diff --git a/logo.png b/-logo.png similarity index 100% rename from logo.png rename to -logo.png diff --git a/.Rbuildignore b/.Rbuildignore index 8a4b6628da..5663b1bade 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,6 +1,7 @@ ^.*\.Rproj$ ^\.Rproj\.user$ -^man-roxygen/ +^man-roxygen$ +^man-src$ ^resources/ ^.*\.hdf5$ ^README\.R?md$ @@ -11,6 +12,7 @@ ^runs$ ^issues$ ^external$ +^vignettes-src$ ^vignettes/examples$ ^vignettes/new-guides$ ^vignettes/learn\.Rmd$ @@ -43,3 +45,7 @@ ^\.vscode$ ^scratch$ ^CRAN-SUBMISSION$ +^revdep$ +^debug.py$ +^\.tether$ +^-logo.png$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000000..2d19fc766d --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/ISSUE_TEMPLATE/installation-issue.md b/.github/ISSUE_TEMPLATE/installation-issue.md index fce7ff1fc8..b29b9c1bf6 100644 --- a/.github/ISSUE_TEMPLATE/installation-issue.md +++ b/.github/ISSUE_TEMPLATE/installation-issue.md @@ -12,9 +12,9 @@ Many installation issues are resolved by running the following in a **fresh R se # issue is already fixed but not on CRAN yet. install.packages("remotes") remotes::install_github(sprintf("rstudio/%s", c("reticulate", "tensorflow", "keras"))) -reticulate::miniconda_uninstall() # start with a blank slate -reticulate::install_miniconda() -keras::install_keras() +if (is.null(reticulate::virtualenv_starter())) + reticulate::install_python() +keras3::install_keras() ``` Test to see if installation was successful. diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 4a4d02242c..7688a39c00 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,10 +1,11 @@ on: + workflow_dispatch: push: branches: - main pull_request: schedule: - - cron: '51 3 * * MON' + - cron: '51 3 * * Fri' name: R-CMD-check @@ -14,110 +15,68 @@ defaults: jobs: R-CMD-check: - name: ${{ matrix.os }}, tf-${{ matrix.tf }}, R-${{ matrix.r }} + name: ${{ matrix.os }}, py-${{ matrix.python }}, R-${{ matrix.r }} timeout-minutes: 30 strategy: fail-fast: false matrix: include: - - {os: 'ubuntu-20.04' , tf: 'default', r: 'release'} - - {os: 'windows-latest', tf: 'default', r: 'release'} - - {os: 'macOS-latest' , tf: 'default', r: 'release'} - - {os: 'ubuntu-20.04' , tf: 'default', r: 'oldrel'} - - {os: 'windows-latest', tf: 'default', r: 'oldrel'} - - {os: 'macOS-latest' , tf: 'default', r: 'oldrel'} + ## the happy path - default everything + - {os: 'ubuntu-latest' , python: '3.11', r: 'release'} + - {os: 'windows-latest', python: '3.11', r: 'release'} + - {os: 'macOS-latest' , python: '3.11', r: 'release'} - - {os: 'ubuntu-20.04' , tf: 'default', r: 'oldrel-1'} - - {os: 'ubuntu-20.04', tf: 'default', r: '3.6'} # default R in ubuntu-20.04 - - {os: 'ubuntu-20.04', tf: 'default', r: '3.5'} # + ## different python versions + - {os: 'ubuntu-latest' , python: '3.10', r: 'release'} + - {os: 'ubuntu-latest' , python: '3.9' , r: 'release'} + # - {os: 'ubuntu-20.04' , python: '3.8' , r: '3.6'} # default R in ubuntu-20.04 # install/pak failure? - # - {os: 'ubuntu-20.04' , tf: 'release', r: 'release'} - # - {os: 'windows-latest', tf: 'release', r: 'release'} - # - {os: 'macOS-latest' , tf: 'release', r: 'release'} - - - {os: 'ubuntu-20.04', tf: '2.8', r: 'release'} - - {os: 'ubuntu-20.04', tf: '2.7', r: 'release'} - - {os: 'ubuntu-20.04', tf: '2.6', r: 'release'} - - {os: 'ubuntu-20.04', tf: '2.5', r: 'release'} - - {os: 'ubuntu-20.04', tf: '2.4', r: 'release'} - - {os: 'ubuntu-20.04', tf: '2.3', r: 'release'} + ## older R versions + # to see the current oldrel-3 (4.0.5): + # https://api.r-hub.io/rversions/resolve/oldrel/3 + - {os: 'ubuntu-latest' , python: '3.9', r: 'oldrel-1'} + - {os: 'ubuntu-latest' , python: '3.9', r: 'oldrel-2'} + - {os: 'ubuntu-latest' , python: '3.9', r: 'oldrel-3'} # these are allowed to fail + # - {os: 'ubuntu-latest', tf: '2.14.0rc1', r: 'release'} # - {os: 'ubuntu-20.04', tf: 'default', r: 'devel'} - # - {os: 'ubuntu-20.04', tf: '2.7.0rc1', r: 'release'} # - {os: 'ubuntu-20.04', tf: 'nightly' , r: 'release'} runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.tf == 'nightly' || contains(matrix.tf, 'rc') || matrix.r == 'devel' }} + # continue-on-error: ${{ matrix.tf == 'nightly' || contains(matrix.tf, 'rc') || matrix.r == 'devel' }} env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: 'true' - # R_COMPILE_AND_INSTALL_PACKAGES: 'never' # commented out until CRAN builds TF 2.8 + R_KEEP_PKG_SOURCE: yes + # R_REMOTES_NO_ERRORS_FROM_WARNINGS: 'true' GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + + - uses: r-lib/actions/setup-pandoc@v2 - uses: r-lib/actions/setup-r@v2 id: setup-r with: r-version: ${{ matrix.r }} - Ncpus: '2L' use-public-rspm: true + Ncpus: '2L' - - uses: r-lib/actions/setup-pandoc@v2 + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck, local::. + cache-version: 1 + upgrade: 'TRUE' - - name: Get Date - id: get-date - shell: bash - run: | - echo "::set-output name=year-week::$(date -u "+%Y-%U")" - echo "::set-output name=date::$(date -u "+%F")" + - name: Install Keras + run: keras3::install_keras(python_version = '${{ matrix.python }}') - - name: Restore R package cache - uses: actions/cache@v2 - id: r-package-cache + - uses: r-lib/actions/check-r-package@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ matrix.os }}-${{ steps.setup-r.outputs.installed-r-version }}-${{ steps.get-date.outputs.year-week }}-4 - - - name: Install remotes - if: steps.r-package-cache.outputs.cache-hit != 'true' - run: install.packages("remotes") - - - name: Install system dependencies - if: runner.os == 'Linux' - shell: bash - run: | - . /etc/os-release - while read -r cmd - do - echo "$cmd" - sudo $cmd - done < <(Rscript -e "writeLines(remotes::system_requirements('$ID-$VERSION_ID'))") - - - name: Install Package + deps - run: remotes::install_local(dependencies = TRUE, force = TRUE) - - - name: Install Miniconda - run: reticulate::install_miniconda() - - - name: Install Tensorflow + Keras deps - run: keras::install_keras(tensorflow = '${{ matrix.tf }}-cpu') - - - name: Install rcmdcheck - run: remotes::install_cran("rcmdcheck") - - - name: Check - run: rcmdcheck::rcmdcheck(args = '--no-manual', error_on = 'warning', check_dir = 'check') - - - name: Show testthat output - if: always() - shell: bash - run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true - - - name: Don't use tar from old Rtools to store the cache - if: ${{ runner.os == 'Windows' && startsWith(steps.install-r.outputs.installed-r-version, '3') }} - shell: bash - run: echo "C:/Program Files/Git/usr/bin" >> $GITHUB_PATH + upload-snapshots: true diff --git a/.github/workflows/pr-commands.yaml b/.github/workflows/pr-commands.yaml new file mode 100644 index 0000000000..71f335b3ea --- /dev/null +++ b/.github/workflows/pr-commands.yaml @@ -0,0 +1,79 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + issue_comment: + types: [created] + +name: Commands + +jobs: + document: + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }} + name: document + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/pr-fetch@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::roxygen2 + needs: pr-document + + - name: Document + run: roxygen2::roxygenise() + shell: Rscript {0} + + - name: commit + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git add man/\* NAMESPACE + git commit -m 'Document' + + - uses: r-lib/actions/pr-push@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + style: + if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }} + name: style + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v3 + + - uses: r-lib/actions/pr-fetch@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + + - uses: r-lib/actions/setup-r@v2 + + - name: Install dependencies + run: install.packages("styler") + shell: Rscript {0} + + - name: Style + run: styler::style_pkg() + shell: Rscript {0} + + - name: commit + run: | + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git add \*.R + git commit -m 'Style' + + - uses: r-lib/actions/pr-push@v2 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 94a6b30719..5201fb6581 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,11 @@ scratch inst/doc .Rprofile .Renviron +revdep +debug.py +man-src/*/translate.patch +*.keras +**.weights.h5 +.Rapp.history +*.csv +*.zip diff --git a/.tether/man/Callback.txt b/.tether/man/Callback.txt new file mode 100644 index 0000000000..1e583450aa --- /dev/null +++ b/.tether/man/Callback.txt @@ -0,0 +1,298 @@ +Help on class Callback in module keras.src.callbacks.callback: + +class Callback(builtins.object) + | Base class used to build new callbacks. + | + | Callbacks can be passed to keras methods such as `fit()`, `evaluate()`, and + | `predict()` in order to hook into the various stages of the model training, + | evaluation, and inference lifecycle. + | + | To create a custom callback, subclass `keras.callbacks.Callback` and + | override the method associated with the stage of interest. + | + | Example: + | + | >>> training_finished = False + | >>> class MyCallback(Callback): + | ... def on_train_end(self, logs=None): + | ... global training_finished + | ... training_finished = True + | >>> model = Sequential([ + | ... layers.Dense(1, input_shape=(1,))]) + | >>> model.compile(loss='mean_squared_error') + | >>> model.fit(np.array([[1.0]]), np.array([[1.0]]), + | ... callbacks=[MyCallback()]) + | >>> assert training_finished == True + | + | If you want to use `Callback` objects in a custom training loop: + | + | 1. You should pack all your callbacks into a single `callbacks.CallbackList` + | so they can all be called together. + | 2. You will need to manually call all the `on_*` methods at the appropriate + | locations in your loop. Like this: + | + | Example: + | + | ```python + | callbacks = keras.callbacks.CallbackList([...]) + | callbacks.append(...) + | callbacks.on_train_begin(...) + | for epoch in range(EPOCHS): + | callbacks.on_epoch_begin(epoch) + | for i, data in dataset.enumerate(): + | callbacks.on_train_batch_begin(i) + | batch_logs = model.train_step(data) + | callbacks.on_train_batch_end(i, batch_logs) + | epoch_logs = ... + | callbacks.on_epoch_end(epoch, epoch_logs) + | final_logs=... + | callbacks.on_train_end(final_logs) + | ``` + | + | Attributes: + | params: Dict. Training parameters + | (eg. verbosity, batch size, number of epochs...). + | model: Instance of `Model`. + | Reference of the model being trained. + | + | The `logs` dictionary that callback methods + | take as argument will contain keys for quantities relevant to + | the current batch or epoch (see method-specific docstrings). + | + | Methods defined here: + | + | __init__(self) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_batch_begin( + | self, + | batch, + | logs=None + | ) + | A backwards compatibility alias for `on_train_batch_begin`. + | + | on_batch_end( + | self, + | batch, + | logs=None + | ) + | A backwards compatibility alias for `on_train_batch_end`. + | + | on_epoch_begin( + | self, + | epoch, + | logs=None + | ) + | Called at the start of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_predict_batch_begin( + | self, + | batch, + | logs=None + | ) + | Called at the beginning of a batch in `predict` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_predict_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a batch in `predict` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_predict_begin(self, logs=None) + | Called at the beginning of prediction. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_predict_end(self, logs=None) + | Called at the end of prediction. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_batch_begin( + | self, + | batch, + | logs=None + | ) + | Called at the beginning of a batch in `evaluate` methods. + | + | Also called at the beginning of a validation batch in the `fit` + | methods, if validation data is provided. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a batch in `evaluate` methods. + | + | Also called at the end of a validation batch in the `fit` + | methods, if validation data is provided. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_test_begin(self, logs=None) + | Called at the beginning of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_end(self, logs=None) + | Called at the end of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_test_batch_end()` is passed to this argument for this method + | but that may change in the future. + | + | on_train_batch_begin( + | self, + | batch, + | logs=None + | ) + | Called at the beginning of a training batch in `fit` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_train_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a training batch in `fit` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_train_begin(self, logs=None) + | Called at the beginning of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_train_end(self, logs=None) + | Called at the end of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_epoch_end()` is passed to this argument for this method but + | that may change in the future. + | + | set_model(self, model) + | + | set_params(self, params) + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | model + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | __dict__ + | dictionary for instance variables + | + | __weakref__ + | list of weak references to the object + diff --git a/.tether/man/Constraint.txt b/.tether/man/Constraint.txt new file mode 100644 index 0000000000..5b167afc53 --- /dev/null +++ b/.tether/man/Constraint.txt @@ -0,0 +1,80 @@ +Help on class Constraint in module keras.src.constraints.constraints: + +class Constraint(builtins.object) + | Base class for weight constraints. + | + | A `Constraint` instance works like a stateless function. + | Users who subclass this + | class should override the `__call__()` method, which takes a single + | weight parameter and return a projected version of that parameter + | (e.g. normalized or clipped). Constraints can be used with various Keras + | layers via the `kernel_constraint` or `bias_constraint` arguments. + | + | Here's a simple example of a non-negative weight constraint: + | + | >>> class NonNegative(keras.constraints.Constraint): + | ... + | ... def __call__(self, w): + | ... return w * ops.cast(ops.greater_equal(w, 0.), dtype=w.dtype) + | + | >>> weight = ops.convert_to_tensor((-1.0, 1.0)) + | >>> NonNegative()(weight) + | [0., 1.] + | + | Usage in a layer: + | + | >>> keras.layers.Dense(4, kernel_constraint=NonNegative()) + | + | Methods defined here: + | + | __call__(self, w) + | Applies the constraint to the input weight variable. + | + | By default, the inputs weight variable is not modified. + | Users should override this method to implement their own projection + | function. + | + | Args: + | w: Input weight variable. + | + | Returns: + | Projected variable (by default, returns unmodified inputs). + | + | get_config(self) + | Returns a Python dict of the object config. + | + | A constraint config is a Python dictionary (JSON-serializable) that can + | be used to reinstantiate the same object. + | + | Returns: + | Python dict containing the configuration of the constraint object. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Instantiates a weight constraint from a configuration dictionary. + | + | Example: + | + | ```python + | constraint = UnitNorm() + | config = constraint.get_config() + | constraint = UnitNorm.from_config(config) + | ``` + | + | Args: + | config: A Python dictionary, the output of `get_config()`. + | + | Returns: + | A `keras.constraints.Constraint` instance. + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | __dict__ + | dictionary for instance variables + | + | __weakref__ + | list of weak references to the object + diff --git a/.tether/man/InputLayer.txt b/.tether/man/InputLayer.txt new file mode 100644 index 0000000000..4212133814 --- /dev/null +++ b/.tether/man/InputLayer.txt @@ -0,0 +1,45 @@ +Help on class InputLayer in module keras.src.layers.core.input_layer: + +class InputLayer(keras.src.layers.layer.Layer) + | InputLayer(shape=None, batch_size=None, dtype=None, sparse=None, batch_shape=None, input_tensor=None, name=None, **kwargs) + | + | Method resolution order: + | InputLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | shape=None, + | batch_size=None, + | dtype=None, + | sparse=None, + | batch_shape=None, + | input_tensor=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | dtype + | Alias of `layer.variable_dtype`. + | + diff --git a/.tether/man/Layer.txt b/.tether/man/Layer.txt new file mode 100644 index 0000000000..db0e84e96c --- /dev/null +++ b/.tether/man/Layer.txt @@ -0,0 +1,504 @@ +Help on class Layer in module keras.src.layers.layer: + +class Layer(keras.src.backend.tensorflow.layer.TFLayer, keras.src.ops.operation.Operation) + | Layer(*args, **kwargs) + | + | This is the class from which all layers inherit. + | + | A layer is a callable object that takes as input one or more tensors and + | that outputs one or more tensors. It involves *computation*, defined + | in the `call()` method, and a *state* (weight variables). State can be + | created: + | + | * in `__init__()`, for instance via `self.add_weight()`; + | * in the optional `build()` method, which is invoked by the first + | `__call__()` to the layer, and supplies the shape(s) of the input(s), + | which may not have been known at initialization time. + | + | Layers are recursively composable: If you assign a Layer instance as an + | attribute of another Layer, the outer layer will start tracking the weights + | created by the inner layer. Nested layers should be instantiated in the + | `__init__()` method or `build()` method. + | + | Users will just instantiate a layer and then treat it as a callable. + | + | Args: + | trainable: Boolean, whether the layer's variables should be trainable. + | name: String name of the layer. + | dtype: The dtype of the layer's computations and weights. Can also be a + | `keras.DTypePolicy`, + | which allows the computation and + | weight dtype to differ. Defaults to `None`. `None` means to use + | `keras.config.dtype_policy()`, + | which is a `float32` policy unless set to different value + | (via `keras.config.set_dtype_policy()`). + | + | Attributes: + | name: The name of the layer (string). + | dtype: Dtype of the layer's weights. Alias of `layer.variable_dtype`. + | variable_dtype: Dtype of the layer's weights. + | compute_dtype: The dtype of the layer's computations. + | Layers automatically cast inputs to this dtype, which causes + | the computations and output to also be in this dtype. + | When mixed precision is used with a + | `keras.DTypePolicy`, this will be different + | than `variable_dtype`. + | trainable_weights: List of variables to be included in backprop. + | non_trainable_weights: List of variables that should not be + | included in backprop. + | weights: The concatenation of the lists trainable_weights and + | non_trainable_weights (in this order). + | trainable: Whether the layer should be trained (boolean), i.e. + | whether its potentially-trainable weights should be returned + | as part of `layer.trainable_weights`. + | input_spec: Optional (list of) `InputSpec` object(s) specifying the + | constraints on inputs that can be accepted by the layer. + | + | We recommend that descendants of `Layer` implement the following methods: + | + | * `__init__()`: Defines custom layer attributes, and creates layer weights + | that do not depend on input shapes, using `add_weight()`, + | or other state. + | * `build(self, input_shape)`: This method can be used to create weights that + | depend on the shape(s) of the input(s), using `add_weight()`, or other + | state. `__call__()` will automatically build the layer + | (if it has not been built yet) by calling `build()`. + | * `call(self, *args, **kwargs)`: Called in `__call__` after making + | sure `build()` has been called. `call()` performs the logic of applying + | the layer to the input arguments. + | Two reserved keyword arguments you can optionally use in `call()` are: + | 1. `training` (boolean, whether the call is in inference mode or + | training mode). + | 2. `mask` (boolean tensor encoding masked timesteps in the input, + | used e.g. in RNN layers). + | A typical signature for this method is `call(self, inputs)`, and user + | could optionally add `training` and `mask` if the layer need them. + | * `get_config(self)`: Returns a dictionary containing the configuration + | used to initialize this layer. If the keys differ from the arguments + | in `__init__()`, then override `from_config(self)` as well. + | This method is used when saving + | the layer or a model that contains this layer. + | + | Examples: + | + | Here's a basic example: a layer with two variables, `w` and `b`, + | that returns `y = w . x + b`. + | It shows how to implement `build()` and `call()`. + | Variables set as attributes of a layer are tracked as weights + | of the layers (in `layer.weights`). + | + | ```python + | class SimpleDense(Layer): + | def __init__(self, units=32): + | super().__init__() + | self.units = units + | + | # Create the state of the layer (weights) + | def build(self, input_shape): + | self.kernel = self.add_weight( + | shape=(input_shape[-1], self.units), + | initializer="glorot_uniform", + | trainable=True, + | name="kernel", + | ) + | self.bias = self.add_weight( + | shape=(self.units,), + | initializer="zeros", + | trainable=True, + | name="bias", + | ) + | + | # Defines the computation + | def call(self, inputs): + | return ops.matmul(inputs, self.kernel) + self.bias + | + | # Instantiates the layer. + | linear_layer = SimpleDense(4) + | + | # This will also call `build(input_shape)` and create the weights. + | y = linear_layer(ops.ones((2, 2))) + | assert len(linear_layer.weights) == 2 + | + | # These weights are trainable, so they're listed in `trainable_weights`: + | assert len(linear_layer.trainable_weights) == 2 + | ``` + | + | Besides trainable weights, updated via backpropagation during training, + | layers can also have non-trainable weights. These weights are meant to + | be updated manually during `call()`. Here's a example layer that computes + | the running sum of its inputs: + | + | ```python + | class ComputeSum(Layer): + | + | def __init__(self, input_dim): + | super(ComputeSum, self).__init__() + | # Create a non-trainable weight. + | self.total = self.add_weight( + | shape=(), + | initializer="zeros", + | trainable=False, + | name="total", + | ) + | + | def call(self, inputs): + | self.total.assign(self.total + ops.sum(inputs)) + | return self.total + | + | my_sum = ComputeSum(2) + | x = ops.ones((2, 2)) + | y = my_sum(x) + | + | assert my_sum.weights == [my_sum.total] + | assert my_sum.non_trainable_weights == [my_sum.total] + | assert my_sum.trainable_weights == [] + | ``` + | + | Method resolution order: + | Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | *args, + | **kwargs + | ) + | Call self as a function. + | + | __init__( + | self, + | *, + | activity_regularizer=None, + | trainable=True, + | dtype=None, + | autocast=True, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | __repr__(self) + | Return repr(self). + | + | __setattr__( + | self, + | name, + | value + | ) + | Support self.foo = trackable syntax. + | + | __str__(self) + | Return str(self). + | + | add_loss(self, loss) + | Can be called inside of the `call()` method to add a scalar loss. + | + | Example: + | + | ```python + | class MyLayer(Layer): + | ... + | def call(self, x): + | self.add_loss(ops.sum(x)) + | return x + | ``` + | + | add_metric(self) + | + | add_variable( + | self, + | shape, + | initializer, + | dtype=None, + | trainable=True, + | autocast=True, + | regularizer=None, + | constraint=None, + | name=None + | ) + | Add a weight variable to the layer. + | + | Alias of `add_weight()`. + | + | add_weight( + | self, + | shape=None, + | initializer=None, + | dtype=None, + | trainable=True, + | autocast=True, + | regularizer=None, + | constraint=None, + | aggregation='mean', + | name=None + | ) + | Add a weight variable to the layer. + | + | Args: + | shape: Shape tuple for the variable. Must be fully-defined + | (no `None` entries). Defaults to `()` (scalar) if unspecified. + | initializer: Initializer object to use to populate the initial + | variable value, or string name of a built-in initializer + | (e.g. `"random_normal"`). If unspecified, defaults to + | `"glorot_uniform"` for floating-point variables and to `"zeros"` + | for all other types (e.g. int, bool). + | dtype: Dtype of the variable to create, e.g. `"float32"`. If + | unspecified, defaults to the layer's variable dtype + | (which itself defaults to `"float32"` if unspecified). + | trainable: Boolean, whether the variable should be trainable via + | backprop or whether its updates are managed manually. Defaults + | to `True`. + | autocast: Boolean, whether to autocast layers variables when + | accessing them. Defaults to `True`. + | regularizer: Regularizer object to call to apply penalty on the + | weight. These penalties are summed into the loss function + | during optimization. Defaults to `None`. + | constraint: Contrainst object to call on the variable after any + | optimizer update, or string name of a built-in constraint. + | Defaults to `None`. + | aggregation: String, one of `'mean'`, `'sum'`, + | `'only_first_replica'`. Annotates the variable with the type + | of multi-replica aggregation to be used for this variable + | when writing custom data parallel training loops. + | name: String name of the variable. Useful for debugging purposes. + | + | build(self, input_shape) + | + | build_from_config(self, config) + | Builds the layer's states with the supplied config dict. + | + | By default, this method calls the `build(config["input_shape"])` method, + | which creates weights based on the layer's input shape in the supplied + | config. If your config contains other information needed to load the + | layer's state, you should override this method. + | + | Args: + | config: Dict containing the input shape associated with this layer. + | + | call( + | self, + | *args, + | **kwargs + | ) + | + | compute_mask( + | self, + | inputs, + | previous_mask + | ) + | + | compute_output_shape( + | self, + | *args, + | **kwargs + | ) + | + | compute_output_spec( + | self, + | *args, + | **kwargs + | ) + | + | count_params(self) + | Count the total number of scalars composing the weights. + | + | Returns: + | An integer count. + | + | get_build_config(self) + | Returns a dictionary with the layer's input shape. + | + | This method returns a config dict that can be used by + | `build_from_config(config)` to create all states (e.g. Variables and + | Lookup tables) needed by the layer. + | + | By default, the config only contains the input shape that the layer + | was built with. If you're writing a custom layer that creates state in + | an unusual way, you should override this method to make sure this state + | is already created when Keras attempts to load its value upon model + | loading. + | + | Returns: + | A dict containing the input shape associated with the layer. + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_weights(self) + | Return the values of `layer.weights` as a list of NumPy arrays. + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + | quantize(self, mode) + | + | quantized_call( + | self, + | *args, + | **kwargs + | ) + | + | save_own_variables(self, store) + | Saves the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is saved upon calling `model.save()`. + | + | Args: + | store: Dict where the state of the model will be saved. + | + | set_weights(self, weights) + | Sets the values of `layer.weights` from a list of NumPy arrays. + | + | stateless_call( + | self, + | trainable_variables, + | non_trainable_variables, + | *args, + | return_losses=False, + | **kwargs + | ) + | Call the layer without any side effects. + | + | Args: + | trainable_variables: List of trainable variables of the model. + | non_trainable_variables: List of non-trainable variables of the + | model. + | *args: Positional arguments to be passed to `call()`. + | return_losses: If `True`, `stateless_call()` will return the list of + | losses created during `call()` as part of its return values. + | **kwargs: Keyword arguments to be passed to `call()`. + | + | Returns: + | A tuple. By default, returns `(outputs, non_trainable_variables)`. + | If `return_losses = True`, then returns + | `(outputs, non_trainable_variables, losses)`. + | + | Note: `non_trainable_variables` include not only non-trainable weights + | such as `BatchNormalization` statistics, but also RNG seed state + | (if there are any random operations part of the layer, such as dropout), + | and `Metric` state (if there are any metrics attached to the layer). + | These are all elements of state of the layer. + | + | Example: + | + | ```python + | model = ... + | data = ... + | trainable_variables = model.trainable_variables + | non_trainable_variables = model.non_trainable_variables + | # Call the model with zero side effects + | outputs, non_trainable_variables = model.stateless_call( + | trainable_variables, + | non_trainable_variables, + | data, + | ) + | # Attach the updated state to the model + | # (until you do this, the model is still in its pre-call state). + | for ref_var, value in zip( + | model.non_trainable_variables, non_trainable_variables + | ): + | ref_var.assign(value) + | ``` + | + | ---------------------------------------------------------------------- + | Static methods defined here: + | + | __new__( + | cls, + | *args, + | **kwargs + | ) + | Create and return a new object. See help(type) for accurate signature. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | compute_dtype + | The dtype of the computations performed by the layer. + | + | dtype + | Alias of `layer.variable_dtype`. + | + | input_dtype + | The dtype layer inputs should be converted to. + | + | losses + | List of scalar losses from `add_loss`, regularizers and sublayers. + | + | metrics + | List of all metrics. + | + | metrics_variables + | List of all metric variables. + | + | non_trainable_variables + | List of all non-trainable layer state. + | + | This extends `layer.non_trainable_weights` to include all state used by + | the layer including state for metrics and `SeedGenerator`s. + | + | non_trainable_weights + | List of all non-trainable weight variables of the layer. + | + | These are the weights that should not be updated by the optimizer during + | training. Unlike, `layer.non_trainable_variables` this excludes metric + | state and random seeds. + | + | trainable_variables + | List of all trainable layer state. + | + | This is equivalent to `layer.trainable_weights`. + | + | trainable_weights + | List of all trainable weight variables of the layer. + | + | These are the weights that get updated by the optimizer during training. + | + | variable_dtype + | The dtype of the state (weights) of the layer. + | + | variables + | List of all layer state, including random seeds. + | + | This extends `layer.weights` to include all state used by the layer + | including `SeedGenerator`s. + | + | Note that metrics variables are not included here, use + | `metrics_variables` to visit all the metric variables. + | + | weights + | List of all weight variables of the layer. + | + | Unlike, `layer.variables` this excludes metric state and random seeds. + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | dtype_policy + | + | input_spec + | + | supports_masking + | Whether this layer supports computing a mask using `compute_mask`. + | + | trainable + | Settable boolean, whether this layer should be trainable or not. + | + diff --git a/.tether/man/LearningRateSchedule.txt b/.tether/man/LearningRateSchedule.txt new file mode 100644 index 0000000000..612dc89c80 --- /dev/null +++ b/.tether/man/LearningRateSchedule.txt @@ -0,0 +1,72 @@ +Help on class LearningRateSchedule in module keras.src.optimizers.schedules.learning_rate_schedule: + +class LearningRateSchedule(builtins.object) + | The learning rate schedule base class. + | + | You can use a learning rate schedule to modulate how the learning rate + | of your optimizer changes over time. + | + | Several built-in learning rate schedules are available, such as + | `keras.optimizers.schedules.ExponentialDecay` or + | `keras.optimizers.schedules.PiecewiseConstantDecay`: + | + | ```python + | lr_schedule = keras.optimizers.schedules.ExponentialDecay( + | initial_learning_rate=1e-2, + | decay_steps=10000, + | decay_rate=0.9) + | optimizer = keras.optimizers.SGD(learning_rate=lr_schedule) + | ``` + | + | A `LearningRateSchedule` instance can be passed in as the `learning_rate` + | argument of any optimizer. + | + | To implement your own schedule object, you should implement the `__call__` + | method, which takes a `step` argument (scalar integer tensor, the + | current training step count). + | Like for any other Keras object, you can also optionally + | make your object serializable by implementing the `get_config` + | and `from_config` methods. + | + | Example: + | + | ```python + | class MyLRSchedule(keras.optimizers.schedules.LearningRateSchedule): + | + | def __init__(self, initial_learning_rate): + | self.initial_learning_rate = initial_learning_rate + | + | def __call__(self, step): + | return self.initial_learning_rate / (step + 1) + | + | optimizer = keras.optimizers.SGD(learning_rate=MyLRSchedule(0.1)) + | ``` + | + | Methods defined here: + | + | __call__(self, step) + | Call self as a function. + | + | get_config(self) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Instantiates a `LearningRateSchedule` from its config. + | + | Args: + | config: Output of `get_config()`. + | + | Returns: + | A `LearningRateSchedule` instance. + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | __dict__ + | dictionary for instance variables + | + | __weakref__ + | list of weak references to the object + diff --git a/.tether/man/Loss.txt b/.tether/man/Loss.txt new file mode 100644 index 0000000000..7ed9bfde50 --- /dev/null +++ b/.tether/man/Loss.txt @@ -0,0 +1,64 @@ +Help on class Loss in module keras.src.losses.loss: + +class Loss(builtins.object) + | Loss( + | name=None, + | reduction='sum_over_batch_size', + | dtype=None + | ) + | + | Loss base class. + | + | To be implemented by subclasses: + | + | * `call()`: Contains the logic for loss calculation using `y_true`, + | `y_pred`. + | + | Example subclass implementation: + | + | ```python + | class MeanSquaredError(Loss): + | def call(self, y_true, y_pred): + | return ops.mean(ops.square(y_pred - y_true), axis=-1) + | ``` + | + | Methods defined here: + | + | __call__( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Call self as a function. + | + | __init__( + | self, + | name=None, + | reduction='sum_over_batch_size', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | y_true, + | y_pred + | ) + | + | get_config(self) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | __dict__ + | dictionary for instance variables + | + | __weakref__ + | list of weak references to the object + diff --git a/.tether/man/Metric.txt b/.tether/man/Metric.txt new file mode 100644 index 0000000000..b443dddf50 --- /dev/null +++ b/.tether/man/Metric.txt @@ -0,0 +1,179 @@ +Help on class Metric in module keras.src.metrics.metric: + +class Metric(builtins.object) + | Metric(dtype=None, name=None) + | + | Encapsulates metric logic and state. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | ```python + | m = SomeMetric(...) + | for input in ...: + | m.update_state(input) + | print('Final result: ', m.result()) + | ``` + | + | Usage with `compile()` API: + | + | ```python + | model = keras.Sequential() + | model.add(keras.layers.Dense(64, activation='relu')) + | model.add(keras.layers.Dense(64, activation='relu')) + | model.add(keras.layers.Dense(10, activation='softmax')) + | + | model.compile(optimizer=keras.optimizers.RMSprop(0.01), + | loss=keras.losses.CategoricalCrossentropy(), + | metrics=[keras.metrics.CategoricalAccuracy()]) + | + | data = np.random.random((1000, 32)) + | labels = np.random.random((1000, 10)) + | + | model.fit(data, labels, epochs=10) + | ``` + | + | To be implemented by subclasses: + | + | * `__init__()`: All state variables should be created in this method by + | calling `self.add_variable()` like: `self.var = self.add_variable(...)` + | * `update_state()`: Has all updates to the state variables like: + | `self.var.assign(...)`. + | * `result()`: Computes and returns a scalar value or a dict of scalar values + | for the metric from the state variables. + | + | Example subclass implementation: + | + | ```python + | class BinaryTruePositives(Metric): + | + | def __init__(self, name='binary_true_positives', **kwargs): + | super().__init__(name=name, **kwargs) + | self.true_positives = self.add_variable( + | shape=(), + | initializer='zeros', + | name='true_positives' + | ) + | + | def update_state(self, y_true, y_pred, sample_weight=None): + | y_true = ops.cast(y_true, "bool") + | y_pred = ops.cast(y_pred, "bool") + | + | values = ops.logical_and( + | ops.equal(y_true, True), ops.equal(y_pred, True)) + | values = ops.cast(values, self.dtype) + | if sample_weight is not None: + | sample_weight = ops.cast(sample_weight, self.dtype) + | sample_weight = ops.broadcast_to( + | sample_weight, ops.shape(values) + | ) + | values = ops.multiply(values, sample_weight) + | self.true_positives.assign(self.true_positives + ops.sum(values)) + | + | def result(self): + | return self.true_positives + | ``` + | + | Methods defined here: + | + | __call__( + | self, + | *args, + | **kwargs + | ) + | Call self as a function. + | + | __init__( + | self, + | dtype=None, + | name=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | __repr__(self) + | Return repr(self). + | + | __setattr__( + | self, + | name, + | value + | ) + | Implement setattr(self, name, value). + | + | __str__(self) + | Return str(self). + | + | add_variable( + | self, + | shape, + | initializer, + | dtype=None, + | aggregation='sum', + | name=None + | ) + | + | add_weight( + | self, + | shape=(), + | initializer=None, + | dtype=None, + | name=None + | ) + | + | get_config(self) + | Return the serializable config of the metric. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | stateless_reset_state(self) + | + | stateless_result(self, metric_variables) + | + | stateless_update_state( + | self, + | metric_variables, + | *args, + | **kwargs + | ) + | + | update_state( + | self, + | *args, + | **kwargs + | ) + | Accumulate statistics for the metric. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | dtype + | + | variables + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | __dict__ + | dictionary for instance variables + | + | __weakref__ + | list of weak references to the object + diff --git a/.tether/man/activation_elu.txt b/.tether/man/activation_elu.txt new file mode 100644 index 0000000000..7c33b75ba0 --- /dev/null +++ b/.tether/man/activation_elu.txt @@ -0,0 +1,25 @@ +__signature__ +keras.activations.elu(x, alpha=1.0) +__doc__ +Exponential Linear Unit. + +The exponential linear unit (ELU) with `alpha > 0` is define as: + +- `x` if `x > 0` +- alpha * `exp(x) - 1` if `x < 0` + +ELUs have negative values which pushes the mean of the activations +closer to zero. + +Mean activations that are closer to zero enable faster learning as they +bring the gradient closer to the natural gradient. +ELUs saturate to a negative value when the argument gets smaller. +Saturation means a small derivative which decreases the variation +and the information that is propagated to the next layer. + +Args: + x: Input tensor. + +Reference: + +- [Clevert et al., 2016](https://arxiv.org/abs/1511.07289) diff --git a/.tether/man/activation_exponential.txt b/.tether/man/activation_exponential.txt new file mode 100644 index 0000000000..b7f546173d --- /dev/null +++ b/.tether/man/activation_exponential.txt @@ -0,0 +1,7 @@ +__signature__ +keras.activations.exponential(x) +__doc__ +Exponential activation function. + +Args: + x: Input tensor. diff --git a/.tether/man/activation_gelu.txt b/.tether/man/activation_gelu.txt new file mode 100644 index 0000000000..0c8f5ef84b --- /dev/null +++ b/.tether/man/activation_gelu.txt @@ -0,0 +1,20 @@ +__signature__ +keras.activations.gelu(x, approximate=False) +__doc__ +Gaussian error linear unit (GELU) activation function. + +The Gaussian error linear unit (GELU) is defined as: + +`gelu(x) = x * P(X <= x)` where `P(X) ~ N(0, 1)`, +i.e. `gelu(x) = 0.5 * x * (1 + erf(x / sqrt(2)))`. + +GELU weights inputs by their value, rather than gating +inputs by their sign as in ReLU. + +Args: + x: Input tensor. + approximate: A `bool`, whether to enable approximation. + +Reference: + +- [Hendrycks et al., 2016](https://arxiv.org/abs/1606.08415) diff --git a/.tether/man/activation_hard_sigmoid.txt b/.tether/man/activation_hard_sigmoid.txt new file mode 100644 index 0000000000..7df083818a --- /dev/null +++ b/.tether/man/activation_hard_sigmoid.txt @@ -0,0 +1,21 @@ +__signature__ +keras.activations.hard_sigmoid(x) +__doc__ +Hard sigmoid activation function. + +The hard sigmoid activation is defined as: + +- `0` if `if x <= -3` +- `1` if `x >= 3` +- `(x/6) + 0.5` if `-3 < x < 3` + +It's a faster, piecewise linear approximation +of the sigmoid activation. + +Args: + x: Input tensor. + +Reference: + +- [Wikipedia "Hard sigmoid"](https://en.wikipedia.org/wiki/Hard_sigmoid) + diff --git a/.tether/man/activation_hard_silu.txt b/.tether/man/activation_hard_silu.txt new file mode 100644 index 0000000000..143fa10c9a --- /dev/null +++ b/.tether/man/activation_hard_silu.txt @@ -0,0 +1,20 @@ +__signature__ +keras.activations.hard_silu(x) +__doc__ +Hard SiLU activation function, also known as Hard Swish. + +It is defined as: + +- `0` if `if x < -3` +- `x` if `x > 3` +- `x * (x + 3) / 6` if `-3 <= x <= 3` + +It's a faster, piecewise linear approximation of the silu activation. + +Args: + x: Input tensor. + +Reference: + +- [A Howard, 2019](https://arxiv.org/abs/1905.02244) + diff --git a/.tether/man/activation_leaky_relu.txt b/.tether/man/activation_leaky_relu.txt new file mode 100644 index 0000000000..53b1b007f6 --- /dev/null +++ b/.tether/man/activation_leaky_relu.txt @@ -0,0 +1,9 @@ +__signature__ +keras.activations.leaky_relu(x, negative_slope=0.2) +__doc__ +Leaky relu activation function. + +Args: + x: Input tensor. + negative_slope: A `float` that controls the slope + for values lower than the threshold. diff --git a/.tether/man/activation_linear.txt b/.tether/man/activation_linear.txt new file mode 100644 index 0000000000..7d8a0ee7a3 --- /dev/null +++ b/.tether/man/activation_linear.txt @@ -0,0 +1,10 @@ +__signature__ +keras.activations.linear(x) +__doc__ +Linear activation function (pass-through). + +A "linear" activation is an identity function: +it returns the input, unmodified. + +Args: + x: Input tensor. diff --git a/.tether/man/activation_log_softmax.txt b/.tether/man/activation_log_softmax.txt new file mode 100644 index 0000000000..b74b689dd1 --- /dev/null +++ b/.tether/man/activation_log_softmax.txt @@ -0,0 +1,12 @@ +__signature__ +keras.activations.log_softmax(x, axis=-1) +__doc__ +Log-Softmax activation function. + +Each input vector is handled independently. +The `axis` argument sets which axis of the input the function +is applied along. + +Args: + x: Input tensor. + axis: Integer, axis along which the softmax is applied. diff --git a/.tether/man/activation_mish.txt b/.tether/man/activation_mish.txt new file mode 100644 index 0000000000..f077a88a55 --- /dev/null +++ b/.tether/man/activation_mish.txt @@ -0,0 +1,19 @@ +__signature__ +keras.activations.mish(x) +__doc__ +Mish activation function. + +It is defined as: + +`mish(x) = x * tanh(softplus(x))` + +where `softplus` is defined as: + +`softplus(x) = log(exp(x) + 1)` + +Args: + x: Input tensor. + +Reference: + +- [Misra, 2019](https://arxiv.org/abs/1908.08681) diff --git a/.tether/man/activation_relu.txt b/.tether/man/activation_relu.txt new file mode 100644 index 0000000000..36514dd8ff --- /dev/null +++ b/.tether/man/activation_relu.txt @@ -0,0 +1,40 @@ +__signature__ +keras.activations.relu( + x, + negative_slope=0.0, + max_value=None, + threshold=0.0 +) +__doc__ +Applies the rectified linear unit activation function. + +With default values, this returns the standard ReLU activation: +`max(x, 0)`, the element-wise maximum of 0 and the input tensor. + +Modifying default parameters allows you to use non-zero thresholds, +change the max value of the activation, +and to use a non-zero multiple of the input for values below the threshold. + +Examples: + +>>> x = [-10, -5, 0.0, 5, 10] +>>> keras.activations.relu(x) +[ 0., 0., 0., 5., 10.] +>>> keras.activations.relu(x, negative_slope=0.5) +[-5. , -2.5, 0. , 5. , 10. ] +>>> keras.activations.relu(x, max_value=5.) +[0., 0., 0., 5., 5.] +>>> keras.activations.relu(x, threshold=5.) +[-0., -0., 0., 0., 10.] + +Args: + x: Input tensor. + negative_slope: A `float` that controls the slope + for values lower than the threshold. + max_value: A `float` that sets the saturation threshold (the largest + value the function will return). + threshold: A `float` giving the threshold value of the activation + function below which values will be damped or set to zero. + +Returns: + A tensor with the same shape and dtype as input `x`. diff --git a/.tether/man/activation_relu6.txt b/.tether/man/activation_relu6.txt new file mode 100644 index 0000000000..483776dcd4 --- /dev/null +++ b/.tether/man/activation_relu6.txt @@ -0,0 +1,9 @@ +__signature__ +keras.activations.relu6(x) +__doc__ +Relu6 activation function. + +It's the ReLU function, but truncated to a maximum value of 6. + +Args: + x: Input tensor. diff --git a/.tether/man/activation_selu.txt b/.tether/man/activation_selu.txt new file mode 100644 index 0000000000..a9ce7b35e9 --- /dev/null +++ b/.tether/man/activation_selu.txt @@ -0,0 +1,37 @@ +__signature__ +keras.activations.selu(x) +__doc__ +Scaled Exponential Linear Unit (SELU). + +The Scaled Exponential Linear Unit (SELU) activation function is defined as: + +- `scale * x` if `x > 0` +- `scale * alpha * (exp(x) - 1)` if `x < 0` + +where `alpha` and `scale` are pre-defined constants +(`alpha=1.67326324` and `scale=1.05070098`). + +Basically, the SELU activation function multiplies `scale` (> 1) with the +output of the `keras.activations.elu` function to ensure a slope larger +than one for positive inputs. + +The values of `alpha` and `scale` are +chosen so that the mean and variance of the inputs are preserved +between two consecutive layers as long as the weights are initialized +correctly (see `keras.initializers.LecunNormal` initializer) +and the number of input units is "large enough" +(see reference paper for more information). + +Args: + x: Input tensor. + +Notes: + +- To be used together with the + `keras.initializers.LecunNormal` initializer. +- To be used together with the dropout variant + `keras.layers.AlphaDropout` (rather than regular dropout). + +Reference: + +- [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) diff --git a/.tether/man/activation_sigmoid.txt b/.tether/man/activation_sigmoid.txt new file mode 100644 index 0000000000..90438bc7df --- /dev/null +++ b/.tether/man/activation_sigmoid.txt @@ -0,0 +1,17 @@ +__signature__ +keras.activations.sigmoid(x) +__doc__ +Sigmoid activation function. + +It is defined as: `sigmoid(x) = 1 / (1 + exp(-x))`. + +For small values (<-5), +`sigmoid` returns a value close to zero, and for large values (>5) +the result of the function gets close to 1. + +Sigmoid is equivalent to a 2-element softmax, where the second element is +assumed to be zero. The sigmoid function always returns a value between +0 and 1. + +Args: + x: Input tensor. diff --git a/.tether/man/activation_silu.txt b/.tether/man/activation_silu.txt new file mode 100644 index 0000000000..98b9bccceb --- /dev/null +++ b/.tether/man/activation_silu.txt @@ -0,0 +1,17 @@ +__signature__ +keras.activations.silu(x) +__doc__ +Swish (or Silu) activation function. + +It is defined as: `swish(x) = x * sigmoid(x)`. + +The Swish (or Silu) activation function is a smooth, +non-monotonic function that is unbounded above and +bounded below. + +Args: + x: Input tensor. + +Reference: + +- [Ramachandran et al., 2017](https://arxiv.org/abs/1710.05941) diff --git a/.tether/man/activation_softmax.txt b/.tether/man/activation_softmax.txt new file mode 100644 index 0000000000..499106a53b --- /dev/null +++ b/.tether/man/activation_softmax.txt @@ -0,0 +1,23 @@ +__signature__ +keras.activations.softmax(x, axis=-1) +__doc__ +Softmax converts a vector of values to a probability distribution. + +The elements of the output vector are in range `[0, 1]` and sum to 1. + +Each input vector is handled independently. +The `axis` argument sets which axis of the input the function +is applied along. + +Softmax is often used as the activation for the last +layer of a classification network because the result could be interpreted as +a probability distribution. + +The softmax of each vector x is computed as +`exp(x) / sum(exp(x))`. + +The input values in are the log-odds of the resulting probability. + +Args: + x: Input tensor. + axis: Integer, axis along which the softmax is applied. diff --git a/.tether/man/activation_softplus.txt b/.tether/man/activation_softplus.txt new file mode 100644 index 0000000000..9df09019a8 --- /dev/null +++ b/.tether/man/activation_softplus.txt @@ -0,0 +1,9 @@ +__signature__ +keras.activations.softplus(x) +__doc__ +Softplus activation function. + +It is defined as: `softplus(x) = log(exp(x) + 1)`. + +Args: + x: Input tensor. diff --git a/.tether/man/activation_softsign.txt b/.tether/man/activation_softsign.txt new file mode 100644 index 0000000000..149b0294d4 --- /dev/null +++ b/.tether/man/activation_softsign.txt @@ -0,0 +1,9 @@ +__signature__ +keras.activations.softsign(x) +__doc__ +Softsign activation function. + +Softsign is defined as: `softsign(x) = x / (abs(x) + 1)`. + +Args: + x: Input tensor. diff --git a/.tether/man/activation_tanh.txt b/.tether/man/activation_tanh.txt new file mode 100644 index 0000000000..722997efb6 --- /dev/null +++ b/.tether/man/activation_tanh.txt @@ -0,0 +1,11 @@ +__signature__ +keras.activations.tanh(x) +__doc__ +Hyperbolic tangent activation function. + +It is defined as: +`tanh(x) = sinh(x) / cosh(x)`, i.e. +`tanh(x) = ((exp(x) - exp(-x)) / (exp(x) + exp(-x)))`. + +Args: + x: Input tensor. diff --git a/.tether/man/application_convnext_base.txt b/.tether/man/application_convnext_base.txt new file mode 100644 index 0000000000..f612065e0b --- /dev/null +++ b/.tether/man/application_convnext_base.txt @@ -0,0 +1,78 @@ +__signature__ +keras.applications.ConvNeXtBase( + model_name='convnext_base', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ConvNeXtBase architecture. + +References: +- [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +(CVPR 2022) + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +The `base`, `large`, and `xlarge` models were first pre-trained on the +ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +pre-trained parameters of the models were assembled from the +[official repository](https://github.com/facebookresearch/ConvNeXt). To get a +sense of how these parameters were converted to Keras compatible parameters, +please refer to +[this repository](https://github.com/sayakpaul/keras-convnext-conversion). + +Note: Each Keras Application expects a specific kind of input preprocessing. +For ConvNeXt, preprocessing is included in the model using a `Normalization` +layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +pixels with values in the [0-255] range. + +When calling the `summary()` method after instantiating a ConvNeXt model, +prefer setting the `expand_nested` argument `summary()` to `True` to better +investigate the instantiated model. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights + file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_convnext_large.txt b/.tether/man/application_convnext_large.txt new file mode 100644 index 0000000000..bcee39e4e1 --- /dev/null +++ b/.tether/man/application_convnext_large.txt @@ -0,0 +1,78 @@ +__signature__ +keras.applications.ConvNeXtLarge( + model_name='convnext_large', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ConvNeXtLarge architecture. + +References: +- [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +(CVPR 2022) + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +The `base`, `large`, and `xlarge` models were first pre-trained on the +ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +pre-trained parameters of the models were assembled from the +[official repository](https://github.com/facebookresearch/ConvNeXt). To get a +sense of how these parameters were converted to Keras compatible parameters, +please refer to +[this repository](https://github.com/sayakpaul/keras-convnext-conversion). + +Note: Each Keras Application expects a specific kind of input preprocessing. +For ConvNeXt, preprocessing is included in the model using a `Normalization` +layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +pixels with values in the [0-255] range. + +When calling the `summary()` method after instantiating a ConvNeXt model, +prefer setting the `expand_nested` argument `summary()` to `True` to better +investigate the instantiated model. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights + file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_convnext_small.txt b/.tether/man/application_convnext_small.txt new file mode 100644 index 0000000000..48303a2f5f --- /dev/null +++ b/.tether/man/application_convnext_small.txt @@ -0,0 +1,78 @@ +__signature__ +keras.applications.ConvNeXtSmall( + model_name='convnext_small', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ConvNeXtSmall architecture. + +References: +- [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +(CVPR 2022) + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +The `base`, `large`, and `xlarge` models were first pre-trained on the +ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +pre-trained parameters of the models were assembled from the +[official repository](https://github.com/facebookresearch/ConvNeXt). To get a +sense of how these parameters were converted to Keras compatible parameters, +please refer to +[this repository](https://github.com/sayakpaul/keras-convnext-conversion). + +Note: Each Keras Application expects a specific kind of input preprocessing. +For ConvNeXt, preprocessing is included in the model using a `Normalization` +layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +pixels with values in the [0-255] range. + +When calling the `summary()` method after instantiating a ConvNeXt model, +prefer setting the `expand_nested` argument `summary()` to `True` to better +investigate the instantiated model. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights + file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_convnext_tiny.txt b/.tether/man/application_convnext_tiny.txt new file mode 100644 index 0000000000..dd9e1f4b59 --- /dev/null +++ b/.tether/man/application_convnext_tiny.txt @@ -0,0 +1,78 @@ +__signature__ +keras.applications.ConvNeXtTiny( + model_name='convnext_tiny', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ConvNeXtTiny architecture. + +References: +- [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +(CVPR 2022) + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +The `base`, `large`, and `xlarge` models were first pre-trained on the +ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +pre-trained parameters of the models were assembled from the +[official repository](https://github.com/facebookresearch/ConvNeXt). To get a +sense of how these parameters were converted to Keras compatible parameters, +please refer to +[this repository](https://github.com/sayakpaul/keras-convnext-conversion). + +Note: Each Keras Application expects a specific kind of input preprocessing. +For ConvNeXt, preprocessing is included in the model using a `Normalization` +layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +pixels with values in the [0-255] range. + +When calling the `summary()` method after instantiating a ConvNeXt model, +prefer setting the `expand_nested` argument `summary()` to `True` to better +investigate the instantiated model. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights + file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_convnext_xlarge.txt b/.tether/man/application_convnext_xlarge.txt new file mode 100644 index 0000000000..ccbd4def0e --- /dev/null +++ b/.tether/man/application_convnext_xlarge.txt @@ -0,0 +1,78 @@ +__signature__ +keras.applications.ConvNeXtXLarge( + model_name='convnext_xlarge', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ConvNeXtXLarge architecture. + +References: +- [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +(CVPR 2022) + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +The `base`, `large`, and `xlarge` models were first pre-trained on the +ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +pre-trained parameters of the models were assembled from the +[official repository](https://github.com/facebookresearch/ConvNeXt). To get a +sense of how these parameters were converted to Keras compatible parameters, +please refer to +[this repository](https://github.com/sayakpaul/keras-convnext-conversion). + +Note: Each Keras Application expects a specific kind of input preprocessing. +For ConvNeXt, preprocessing is included in the model using a `Normalization` +layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +pixels with values in the [0-255] range. + +When calling the `summary()` method after instantiating a ConvNeXt model, +prefer setting the `expand_nested` argument `summary()` to `True` to better +investigate the instantiated model. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights + file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_densenet121.txt b/.tether/man/application_densenet121.txt new file mode 100644 index 0000000000..24db06c180 --- /dev/null +++ b/.tether/man/application_densenet121.txt @@ -0,0 +1,64 @@ +__signature__ +keras.applications.DenseNet121( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the Densenet121 architecture. + +Reference: +- [Densely Connected Convolutional Networks]( + https://arxiv.org/abs/1608.06993) (CVPR 2017) + +Optionally loads weights pre-trained on ImageNet. +Note that the data format convention used by the model is +the one specified in your Keras config at `~/.keras/keras.json`. + +Note: each Keras Application expects a specific kind of input preprocessing. +For DenseNet, call `keras.applications.densenet.preprocess_input` +on your inputs before passing them to the model. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `'channels_last'` data format) + or `(3, 224, 224)` (with `'channels_first'` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. + The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits + of the "top" layer. When loading pretrained weights, + `classifier_activation` can only be `None` or `"softmax"`. + +Returns: + A Keras model instance. diff --git a/.tether/man/application_densenet169.txt b/.tether/man/application_densenet169.txt new file mode 100644 index 0000000000..234ce93f15 --- /dev/null +++ b/.tether/man/application_densenet169.txt @@ -0,0 +1,64 @@ +__signature__ +keras.applications.DenseNet169( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the Densenet169 architecture. + +Reference: +- [Densely Connected Convolutional Networks]( + https://arxiv.org/abs/1608.06993) (CVPR 2017) + +Optionally loads weights pre-trained on ImageNet. +Note that the data format convention used by the model is +the one specified in your Keras config at `~/.keras/keras.json`. + +Note: each Keras Application expects a specific kind of input preprocessing. +For DenseNet, call `keras.applications.densenet.preprocess_input` +on your inputs before passing them to the model. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `'channels_last'` data format) + or `(3, 224, 224)` (with `'channels_first'` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. + The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits + of the "top" layer. When loading pretrained weights, + `classifier_activation` can only be `None` or `"softmax"`. + +Returns: + A Keras model instance. diff --git a/.tether/man/application_densenet201.txt b/.tether/man/application_densenet201.txt new file mode 100644 index 0000000000..491c76670b --- /dev/null +++ b/.tether/man/application_densenet201.txt @@ -0,0 +1,64 @@ +__signature__ +keras.applications.DenseNet201( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the Densenet201 architecture. + +Reference: +- [Densely Connected Convolutional Networks]( + https://arxiv.org/abs/1608.06993) (CVPR 2017) + +Optionally loads weights pre-trained on ImageNet. +Note that the data format convention used by the model is +the one specified in your Keras config at `~/.keras/keras.json`. + +Note: each Keras Application expects a specific kind of input preprocessing. +For DenseNet, call `keras.applications.densenet.preprocess_input` +on your inputs before passing them to the model. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `'channels_last'` data format) + or `(3, 224, 224)` (with `'channels_first'` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. + The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits + of the "top" layer. When loading pretrained weights, + `classifier_activation` can only be `None` or `"softmax"`. + +Returns: + A Keras model instance. diff --git a/.tether/man/application_efficientnet_b0.txt b/.tether/man/application_efficientnet_b0.txt new file mode 100644 index 0000000000..b1c8ddf40a --- /dev/null +++ b/.tether/man/application_efficientnet_b0.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB0( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB0 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_b1.txt b/.tether/man/application_efficientnet_b1.txt new file mode 100644 index 0000000000..5f07fd8106 --- /dev/null +++ b/.tether/man/application_efficientnet_b1.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB1( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB1 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_b2.txt b/.tether/man/application_efficientnet_b2.txt new file mode 100644 index 0000000000..d46d5cc50b --- /dev/null +++ b/.tether/man/application_efficientnet_b2.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB2 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_b3.txt b/.tether/man/application_efficientnet_b3.txt new file mode 100644 index 0000000000..4882fa48ba --- /dev/null +++ b/.tether/man/application_efficientnet_b3.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB3( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB3 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_b4.txt b/.tether/man/application_efficientnet_b4.txt new file mode 100644 index 0000000000..9f9ae69c33 --- /dev/null +++ b/.tether/man/application_efficientnet_b4.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB4( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB4 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_b5.txt b/.tether/man/application_efficientnet_b5.txt new file mode 100644 index 0000000000..4bf093aeb1 --- /dev/null +++ b/.tether/man/application_efficientnet_b5.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB5( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB5 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_b6.txt b/.tether/man/application_efficientnet_b6.txt new file mode 100644 index 0000000000..928b0c8d52 --- /dev/null +++ b/.tether/man/application_efficientnet_b6.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB6( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB6 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_b7.txt b/.tether/man/application_efficientnet_b7.txt new file mode 100644 index 0000000000..e0c1b48cfb --- /dev/null +++ b/.tether/man/application_efficientnet_b7.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.EfficientNetB7( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +__doc__ +Instantiates the EfficientNetB7 architecture. + +Reference: +- [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( + https://arxiv.org/abs/1905.11946) (ICML 2019) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNet, input preprocessing is included as part of the model +(as a `Rescaling` layer), and thus +`keras.applications.efficientnet.preprocess_input` is actually a +pass-through function. EfficientNet models expect their inputs to be float +tensors of pixels with values in the `[0-255]` range. + +Args: + include_top: Whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to `None`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. 1000 is how many + ImageNet classes there are. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `'softmax'`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_v2b0.txt b/.tether/man/application_efficientnet_v2b0.txt new file mode 100644 index 0000000000..e75cbe3f1a --- /dev/null +++ b/.tether/man/application_efficientnet_v2b0.txt @@ -0,0 +1,76 @@ +__signature__ +keras.applications.EfficientNetV2B0( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the EfficientNetV2B0 architecture. + +Reference: +- [EfficientNetV2: Smaller Models and Faster Training]( + https://arxiv.org/abs/2104.00298) (ICML 2021) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNetV2, by default input preprocessing is included as a part of +the model (as a `Rescaling` layer), and thus +`keras.applications.efficientnet_v2.preprocess_input` is actually a +pass-through function. In this use case, EfficientNetV2 models expect their +inputs to be float tensors of pixels with values in the `[0, 255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled EfficientNetV2 models expect their inputs to be +float tensors of pixels with values in the `[-1, 1]` range. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `"avg"` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `"max"` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A string or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_v2b1.txt b/.tether/man/application_efficientnet_v2b1.txt new file mode 100644 index 0000000000..e61a40a69b --- /dev/null +++ b/.tether/man/application_efficientnet_v2b1.txt @@ -0,0 +1,76 @@ +__signature__ +keras.applications.EfficientNetV2B1( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the EfficientNetV2B1 architecture. + +Reference: +- [EfficientNetV2: Smaller Models and Faster Training]( + https://arxiv.org/abs/2104.00298) (ICML 2021) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNetV2, by default input preprocessing is included as a part of +the model (as a `Rescaling` layer), and thus +`keras.applications.efficientnet_v2.preprocess_input` is actually a +pass-through function. In this use case, EfficientNetV2 models expect their +inputs to be float tensors of pixels with values in the `[0, 255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled EfficientNetV2 models expect their inputs to be +float tensors of pixels with values in the `[-1, 1]` range. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `"avg"` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `"max"` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A string or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_v2b2.txt b/.tether/man/application_efficientnet_v2b2.txt new file mode 100644 index 0000000000..edbd5e7041 --- /dev/null +++ b/.tether/man/application_efficientnet_v2b2.txt @@ -0,0 +1,76 @@ +__signature__ +keras.applications.EfficientNetV2B2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the EfficientNetV2B2 architecture. + +Reference: +- [EfficientNetV2: Smaller Models and Faster Training]( + https://arxiv.org/abs/2104.00298) (ICML 2021) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNetV2, by default input preprocessing is included as a part of +the model (as a `Rescaling` layer), and thus +`keras.applications.efficientnet_v2.preprocess_input` is actually a +pass-through function. In this use case, EfficientNetV2 models expect their +inputs to be float tensors of pixels with values in the `[0, 255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled EfficientNetV2 models expect their inputs to be +float tensors of pixels with values in the `[-1, 1]` range. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `"avg"` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `"max"` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A string or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_v2b3.txt b/.tether/man/application_efficientnet_v2b3.txt new file mode 100644 index 0000000000..651433155c --- /dev/null +++ b/.tether/man/application_efficientnet_v2b3.txt @@ -0,0 +1,76 @@ +__signature__ +keras.applications.EfficientNetV2B3( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the EfficientNetV2B3 architecture. + +Reference: +- [EfficientNetV2: Smaller Models and Faster Training]( + https://arxiv.org/abs/2104.00298) (ICML 2021) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNetV2, by default input preprocessing is included as a part of +the model (as a `Rescaling` layer), and thus +`keras.applications.efficientnet_v2.preprocess_input` is actually a +pass-through function. In this use case, EfficientNetV2 models expect their +inputs to be float tensors of pixels with values in the `[0, 255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled EfficientNetV2 models expect their inputs to be +float tensors of pixels with values in the `[-1, 1]` range. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `"avg"` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `"max"` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A string or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_v2l.txt b/.tether/man/application_efficientnet_v2l.txt new file mode 100644 index 0000000000..fa1044efbb --- /dev/null +++ b/.tether/man/application_efficientnet_v2l.txt @@ -0,0 +1,76 @@ +__signature__ +keras.applications.EfficientNetV2L( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the EfficientNetV2L architecture. + +Reference: +- [EfficientNetV2: Smaller Models and Faster Training]( + https://arxiv.org/abs/2104.00298) (ICML 2021) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNetV2, by default input preprocessing is included as a part of +the model (as a `Rescaling` layer), and thus +`keras.applications.efficientnet_v2.preprocess_input` is actually a +pass-through function. In this use case, EfficientNetV2 models expect their +inputs to be float tensors of pixels with values in the `[0, 255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled EfficientNetV2 models expect their inputs to be +float tensors of pixels with values in the `[-1, 1]` range. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `"avg"` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `"max"` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A string or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_v2m.txt b/.tether/man/application_efficientnet_v2m.txt new file mode 100644 index 0000000000..c81200662f --- /dev/null +++ b/.tether/man/application_efficientnet_v2m.txt @@ -0,0 +1,76 @@ +__signature__ +keras.applications.EfficientNetV2M( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the EfficientNetV2M architecture. + +Reference: +- [EfficientNetV2: Smaller Models and Faster Training]( + https://arxiv.org/abs/2104.00298) (ICML 2021) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNetV2, by default input preprocessing is included as a part of +the model (as a `Rescaling` layer), and thus +`keras.applications.efficientnet_v2.preprocess_input` is actually a +pass-through function. In this use case, EfficientNetV2 models expect their +inputs to be float tensors of pixels with values in the `[0, 255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled EfficientNetV2 models expect their inputs to be +float tensors of pixels with values in the `[-1, 1]` range. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `"avg"` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `"max"` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A string or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_efficientnet_v2s.txt b/.tether/man/application_efficientnet_v2s.txt new file mode 100644 index 0000000000..eb0a2e95a7 --- /dev/null +++ b/.tether/man/application_efficientnet_v2s.txt @@ -0,0 +1,76 @@ +__signature__ +keras.applications.EfficientNetV2S( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the EfficientNetV2S architecture. + +Reference: +- [EfficientNetV2: Smaller Models and Faster Training]( + https://arxiv.org/abs/2104.00298) (ICML 2021) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For EfficientNetV2, by default input preprocessing is included as a part of +the model (as a `Rescaling` layer), and thus +`keras.applications.efficientnet_v2.preprocess_input` is actually a +pass-through function. In this use case, EfficientNetV2 models expect their +inputs to be float tensors of pixels with values in the `[0, 255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled EfficientNetV2 models expect their inputs to be +float tensors of pixels with values in the `[-1, 1]` range. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: Optional shape tuple, only to be specified + if `include_top` is `False`. + It should have exactly 3 inputs channels. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. Defaults to None. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `"avg"` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `"max"` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000 (number of + ImageNet classes). + classifier_activation: A string or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + Defaults to `"softmax"`. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_inception_resnet_v2.txt b/.tether/man/application_inception_resnet_v2.txt new file mode 100644 index 0000000000..e9c3b0b361 --- /dev/null +++ b/.tether/man/application_inception_resnet_v2.txt @@ -0,0 +1,74 @@ +__signature__ +keras.applications.InceptionResNetV2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the Inception-ResNet v2 architecture. + +Reference: +- [Inception-v4, Inception-ResNet and the Impact of + Residual Connections on Learning](https://arxiv.org/abs/1602.07261) + (AAAI 2017) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of +input preprocessing. For InceptionResNetV2, call +`keras.applications.inception_resnet_v2.preprocess_input` +on your inputs before passing them to the model. +`inception_resnet_v2.preprocess_input` +will scale input pixels between -1 and 1. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is `False` (otherwise the input shape + has to be `(299, 299, 3)` + (with `'channels_last'` data format) + or `(3, 299, 299)` (with `'channels_first'` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 75. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the last convolutional block. + - `'avg'` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `'max'` means that global max pooling will be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, + and if no `weights` argument is specified. + classifier_activation: A `str` or callable. + The activation function to use on the "top" layer. + Ignored unless `include_top=True`. + Set `classifier_activation=None` to return the logits + of the "top" layer. When loading pretrained weights, + `classifier_activation` can only be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_inception_v3.txt b/.tether/man/application_inception_v3.txt new file mode 100644 index 0000000000..e01a75a374 --- /dev/null +++ b/.tether/man/application_inception_v3.txt @@ -0,0 +1,74 @@ +__signature__ +keras.applications.InceptionV3( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the Inception v3 architecture. + +Reference: +- [Rethinking the Inception Architecture for Computer Vision]( + http://arxiv.org/abs/1512.00567) (CVPR 2016) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For `InceptionV3`, call +`keras.applications.inception_v3.preprocess_input` on your inputs +before passing them to the model. +`inception_v3.preprocess_input` will scale input pixels between -1 and 1. + +Args: + include_top: Boolean, whether to include the fully-connected + layer at the top, as the last layer of the network. + Defaults to `True`. + weights: One of `None` (random initialization), + `imagenet` (pre-training on ImageNet), + or the path to the weights file to be loaded. + Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. `input_tensor` is useful for + sharing inputs between multiple different networks. + Defaults to `None`. + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(299, 299, 3)` (with `channels_last` data format) + or `(3, 299, 299)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 75. + E.g. `(150, 150, 3)` would be one valid value. + `input_shape` will be ignored if the `input_tensor` is provided. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` (default) means that the output of the model will be + the 4D tensor output of the last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. Defaults to 1000. + classifier_activation: A `str` or callable. The activation function + to use on the "top" layer. Ignored unless `include_top=True`. + Set `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` + can only be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_mobilenet.txt b/.tether/man/application_mobilenet.txt new file mode 100644 index 0000000000..095e3e674d --- /dev/null +++ b/.tether/man/application_mobilenet.txt @@ -0,0 +1,87 @@ +__signature__ +keras.applications.MobileNet( + input_shape=None, + alpha=1.0, + depth_multiplier=1, + dropout=0.001, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the MobileNet architecture. + +Reference: +- [MobileNets: Efficient Convolutional Neural Networks + for Mobile Vision Applications]( + https://arxiv.org/abs/1704.04861) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For MobileNet, call `keras.applications.mobilenet.preprocess_input` +on your inputs before passing them to the model. +`mobilenet.preprocess_input` will scale input pixels between -1 and 1. + +Args: + input_shape: Optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). + It should have exactly 3 inputs channels, and width and + height should be no smaller than 32. E.g. `(200, 200, 3)` would + be one valid value. Defaults to `None`. + `input_shape` will be ignored if the `input_tensor` is provided. + alpha: Controls the width of the network. This is known as the width + multiplier in the MobileNet paper. + - If `alpha < 1.0`, proportionally decreases the number + of filters in each layer. + - If `alpha > 1.0`, proportionally increases the number + of filters in each layer. + - If `alpha == 1`, default number of filters from the paper + are used at each layer. Defaults to `1.0`. + depth_multiplier: Depth multiplier for depthwise convolution. + This is called the resolution multiplier in the MobileNet paper. + Defaults to `1.0`. + dropout: Dropout rate. Defaults to `0.001`. + include_top: Boolean, whether to include the fully-connected layer + at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), `"imagenet"` + (pre-training on ImageNet), or the path to the weights file + to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. `input_tensor` is useful + for sharing inputs between multiple different networks. + Defaults to `None`. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` (default) means that the output of the model will be + the 4D tensor output of the last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: Optional number of classes to classify images into, + only to be specified if `include_top` is `True`, and if + no `weights` argument is specified. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function + to use on the "top" layer. Ignored unless `include_top=True`. + Set `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` + can only be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_mobilenet_v2.txt b/.tether/man/application_mobilenet_v2.txt new file mode 100644 index 0000000000..8c804cbe03 --- /dev/null +++ b/.tether/man/application_mobilenet_v2.txt @@ -0,0 +1,89 @@ +__signature__ +keras.applications.MobileNetV2( + input_shape=None, + alpha=1.0, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the MobileNetV2 architecture. + +MobileNetV2 is very similar to the original MobileNet, +except that it uses inverted residual blocks with +bottlenecking features. It has a drastically lower +parameter count than the original MobileNet. +MobileNets support any input size greater +than 32 x 32, with larger image sizes +offering better performance. + +Reference: +- [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( + https://arxiv.org/abs/1801.04381) (CVPR 2018) + +This function returns a Keras image classification model, +optionally loaded with weights pre-trained on ImageNet. + +For image classification use cases, see +[this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For MobileNetV2, call +`keras.applications.mobilenet_v2.preprocess_input` +on your inputs before passing them to the model. +`mobilenet_v2.preprocess_input` will scale input pixels between -1 and 1. + +Args: + input_shape: Optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). + It should have exactly 3 inputs channels, and width and + height should be no smaller than 32. E.g. `(200, 200, 3)` would + be one valid value. Defaults to `None`. + `input_shape` will be ignored if the `input_tensor` is provided. + alpha: Controls the width of the network. This is known as the width + multiplier in the MobileNet paper. + - If `alpha < 1.0`, proportionally decreases the number + of filters in each layer. + - If `alpha > 1.0`, proportionally increases the number + of filters in each layer. + - If `alpha == 1`, default number of filters from the paper + are used at each layer. Defaults to `1.0`. + include_top: Boolean, whether to include the fully-connected layer + at the top of the network. Defaults to `True`. + weights: One of `None` (random initialization), `"imagenet"` + (pre-training on ImageNet), or the path to the weights file + to be loaded. Defaults to `"imagenet"`. + input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. `input_tensor` is useful + for sharing inputs between multiple different networks. + Defaults to `None`. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` (default) means that the output of the model will be + the 4D tensor output of the last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: Optional number of classes to classify images into, + only to be specified if `include_top` is `True`, and if + no `weights` argument is specified. Defaults to `1000`. + classifier_activation: A `str` or callable. The activation function + to use on the "top" layer. Ignored unless `include_top=True`. + Set `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` + can only be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_mobilenet_v3_large.txt b/.tether/man/application_mobilenet_v3_large.txt new file mode 100644 index 0000000000..542d5a7674 --- /dev/null +++ b/.tether/man/application_mobilenet_v3_large.txt @@ -0,0 +1,120 @@ +__signature__ +keras.applications.MobileNetV3Large( + input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the MobileNetV3Large architecture. + +Reference: +- [Searching for MobileNetV3]( + https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) + +The following table describes the performance of MobileNets v3: +------------------------------------------------------------------------ +MACs stands for Multiply Adds + +|Classification Checkpoint|MACs(M)|Parameters(M)|Top1 Accuracy|Pixel1 CPU(ms)| +|---|---|---|---|---| +| mobilenet_v3_large_1.0_224 | 217 | 5.4 | 75.6 | 51.2 | +| mobilenet_v3_large_0.75_224 | 155 | 4.0 | 73.3 | 39.8 | +| mobilenet_v3_large_minimalistic_1.0_224 | 209 | 3.9 | 72.3 | 44.1 | +| mobilenet_v3_small_1.0_224 | 66 | 2.9 | 68.1 | 15.8 | +| mobilenet_v3_small_0.75_224 | 44 | 2.4 | 65.4 | 12.8 | +| mobilenet_v3_small_minimalistic_1.0_224 | 65 | 2.0 | 61.9 | 12.2 | + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For MobileNetV3, by default input preprocessing is included as a part of the +model (as a `Rescaling` layer), and thus +`keras.applications.mobilenet_v3.preprocess_input` is actually a +pass-through function. In this use case, MobileNetV3 models expect their +inputs to be float tensors of pixels with values in the `[0-255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled MobileNetV3 models expect their inputs to be float +tensors of pixels with values in the `[-1, 1]` range. + +Args: + input_shape: Optional shape tuple, to be specified if you would + like to use a model with an input image resolution that is not + `(224, 224, 3)`. + It should have exactly 3 inputs channels. + You can also omit this option if you would like + to infer input_shape from an input_tensor. + If you choose to include both input_tensor and input_shape then + input_shape will be used if they match, if the shapes + do not match then we will throw an error. + E.g. `(160, 160, 3)` would be one valid value. + alpha: controls the width of the network. This is known as the + depth multiplier in the MobileNetV3 paper, but the name is kept for + consistency with MobileNetV1 in Keras. + - If `alpha < 1.0`, proportionally decreases the number + of filters in each layer. + - If `alpha > 1.0`, proportionally increases the number + of filters in each layer. + - If `alpha == 1`, default number of filters from the paper + are used at each layer. + minimalistic: In addition to large and small models this module also + contains so-called minimalistic models, these models have the same + per-layer dimensions characteristic as MobilenetV3 however, they don't + utilize any of the advanced blocks (squeeze-and-excite units, + hard-swish, and 5x5 convolutions). + While these models are less efficient on CPU, they + are much more performant on GPU/DSP. + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: String, one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: String, optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Integer, optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + dropout_rate: fraction of the input units to drop on the last layer. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + include_preprocessing: Boolean, whether to include the preprocessing + layer (`Rescaling`) at the bottom of the network. Defaults to `True`. + +Call arguments: + inputs: A floating point `numpy.array` or backend-native tensor, + 4D with 3 color channels, with values in the range `[0, 255]` + if `include_preprocessing` is `True` and in the range `[-1, 1]` + otherwise. + +Returns: + A model instance. diff --git a/.tether/man/application_mobilenet_v3_small.txt b/.tether/man/application_mobilenet_v3_small.txt new file mode 100644 index 0000000000..8cb001c8d9 --- /dev/null +++ b/.tether/man/application_mobilenet_v3_small.txt @@ -0,0 +1,120 @@ +__signature__ +keras.applications.MobileNetV3Small( + input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation='softmax', + include_preprocessing=True +) +__doc__ +Instantiates the MobileNetV3Small architecture. + +Reference: +- [Searching for MobileNetV3]( + https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) + +The following table describes the performance of MobileNets v3: +------------------------------------------------------------------------ +MACs stands for Multiply Adds + +|Classification Checkpoint|MACs(M)|Parameters(M)|Top1 Accuracy|Pixel1 CPU(ms)| +|---|---|---|---|---| +| mobilenet_v3_large_1.0_224 | 217 | 5.4 | 75.6 | 51.2 | +| mobilenet_v3_large_0.75_224 | 155 | 4.0 | 73.3 | 39.8 | +| mobilenet_v3_large_minimalistic_1.0_224 | 209 | 3.9 | 72.3 | 44.1 | +| mobilenet_v3_small_1.0_224 | 66 | 2.9 | 68.1 | 15.8 | +| mobilenet_v3_small_0.75_224 | 44 | 2.4 | 65.4 | 12.8 | +| mobilenet_v3_small_minimalistic_1.0_224 | 65 | 2.0 | 61.9 | 12.2 | + +For image classification use cases, see +[this page for detailed examples]( +https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( +https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For MobileNetV3, by default input preprocessing is included as a part of the +model (as a `Rescaling` layer), and thus +`keras.applications.mobilenet_v3.preprocess_input` is actually a +pass-through function. In this use case, MobileNetV3 models expect their +inputs to be float tensors of pixels with values in the `[0-255]` range. +At the same time, preprocessing as a part of the model (i.e. `Rescaling` +layer) can be disabled by setting `include_preprocessing` argument to `False`. +With preprocessing disabled MobileNetV3 models expect their inputs to be float +tensors of pixels with values in the `[-1, 1]` range. + +Args: + input_shape: Optional shape tuple, to be specified if you would + like to use a model with an input image resolution that is not + `(224, 224, 3)`. + It should have exactly 3 inputs channels. + You can also omit this option if you would like + to infer input_shape from an input_tensor. + If you choose to include both input_tensor and input_shape then + input_shape will be used if they match, if the shapes + do not match then we will throw an error. + E.g. `(160, 160, 3)` would be one valid value. + alpha: controls the width of the network. This is known as the + depth multiplier in the MobileNetV3 paper, but the name is kept for + consistency with MobileNetV1 in Keras. + - If `alpha < 1.0`, proportionally decreases the number + of filters in each layer. + - If `alpha > 1.0`, proportionally increases the number + of filters in each layer. + - If `alpha == 1`, default number of filters from the paper + are used at each layer. + minimalistic: In addition to large and small models this module also + contains so-called minimalistic models, these models have the same + per-layer dimensions characteristic as MobilenetV3 however, they don't + utilize any of the advanced blocks (squeeze-and-excite units, + hard-swish, and 5x5 convolutions). + While these models are less efficient on CPU, they + are much more performant on GPU/DSP. + include_top: Boolean, whether to include the fully-connected + layer at the top of the network. Defaults to `True`. + weights: String, one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: String, optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Integer, optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + dropout_rate: fraction of the input units to drop on the last layer. + classifier_activation: A `str` or callable. The activation function to use + on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + include_preprocessing: Boolean, whether to include the preprocessing + layer (`Rescaling`) at the bottom of the network. Defaults to `True`. + +Call arguments: + inputs: A floating point `numpy.array` or backend-native tensor, + 4D with 3 color channels, with values in the range `[0, 255]` + if `include_preprocessing` is `True` and in the range `[-1, 1]` + otherwise. + +Returns: + A model instance. diff --git a/.tether/man/application_nasnetlarge.txt b/.tether/man/application_nasnetlarge.txt new file mode 100644 index 0000000000..e5123c3f80 --- /dev/null +++ b/.tether/man/application_nasnetlarge.txt @@ -0,0 +1,63 @@ +__signature__ +keras.applications.NASNetLarge( + input_shape=None, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates a NASNet model in ImageNet mode. + +Reference: +- [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) + +Optionally loads weights pre-trained on ImageNet. +Note that the data format convention used by the model is +the one specified in your Keras config at `~/.keras/keras.json`. + +Note: each Keras Application expects a specific kind of input preprocessing. +For NASNet, call `keras.applications.nasnet.preprocess_input` on your +inputs before passing them to the model. + +Args: + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(331, 331, 3)` for NASNetLarge. + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + include_top: Whether to include the fully-connected + layer at the top of the network. + weights: `None` (random initialization) or + `imagenet` (ImageNet weights). For loading `imagenet` weights, + `input_shape` should be (331, 331, 3) + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` + can only be `None` or `"softmax"`. + +Returns: + A Keras model instance. diff --git a/.tether/man/application_nasnetmobile.txt b/.tether/man/application_nasnetmobile.txt new file mode 100644 index 0000000000..bec8825517 --- /dev/null +++ b/.tether/man/application_nasnetmobile.txt @@ -0,0 +1,63 @@ +__signature__ +keras.applications.NASNetMobile( + input_shape=None, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates a Mobile NASNet model in ImageNet mode. + +Reference: +- [Learning Transferable Architectures for Scalable Image Recognition]( + https://arxiv.org/abs/1707.07012) (CVPR 2018) + +Optionally loads weights pre-trained on ImageNet. +Note that the data format convention used by the model is +the one specified in your Keras config at `~/.keras/keras.json`. + +Note: each Keras Application expects a specific kind of input preprocessing. +For NASNet, call `keras.applications.nasnet.preprocess_input` on your +inputs before passing them to the model. + +Args: + input_shape: Optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` for NASNetMobile + It should have exactly 3 inputs channels, + and width and height should be no smaller than 32. + E.g. `(224, 224, 3)` would be one valid value. + include_top: Whether to include the fully-connected + layer at the top of the network. + weights: `None` (random initialization) or + `imagenet` (ImageNet weights). For loading `imagenet` weights, + `input_shape` should be (224, 224, 3) + input_tensor: Optional Keras tensor (i.e. output of + `layers.Input()`) + to use as image input for the model. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model + will be the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a + 2D tensor. + - `max` means that global max pooling will + be applied. + classes: Optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` can + only be `None` or `"softmax"`. + +Returns: + A Keras model instance. diff --git a/.tether/man/application_resnet101.txt b/.tether/man/application_resnet101.txt new file mode 100644 index 0000000000..3ffe21d7ad --- /dev/null +++ b/.tether/man/application_resnet101.txt @@ -0,0 +1,63 @@ +__signature__ +keras.applications.ResNet101( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ResNet101 architecture. + +Reference: +- [Deep Residual Learning for Image Recognition]( + https://arxiv.org/abs/1512.03385) (CVPR 2015) + +For image classification use cases, see [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For ResNet, call `keras.applications.resnet.preprocess_input` on your +inputs before passing them to the model. `resnet.preprocess_input` will convert +the input images from RGB to BGR, then will zero-center each color channel with +respect to the ImageNet dataset, without scaling. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), or the path to the weights + file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). It should have exactly 3 + inputs channels, and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor + output of the last convolutional block. + - `avg` means that global average pooling will be applied to the output + of the last convolutional block, and thus the output of the + model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is `True`, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A Model instance. diff --git a/.tether/man/application_resnet101_v2.txt b/.tether/man/application_resnet101_v2.txt new file mode 100644 index 0000000000..8a9026de1a --- /dev/null +++ b/.tether/man/application_resnet101_v2.txt @@ -0,0 +1,62 @@ +__signature__ +keras.applications.ResNet101V2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ResNet101V2 architecture. + +Reference: +- [Identity Mappings in Deep Residual Networks]( + https://arxiv.org/abs/1603.05027) (CVPR 2016) + +For image classification use cases, see [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For ResNet, call `keras.applications.resnet_v2.preprocess_input` on your +inputs before passing them to the model. `resnet_v2.preprocess_input` will +scale input pixels between -1 and 1. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), or the path to the weights + file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). It should have exactly 3 + inputs channels, and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor + output of the last convolutional block. + - `avg` means that global average pooling will be applied to the output + of the last convolutional block, and thus the output of the + model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is `True`, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A Model instance. diff --git a/.tether/man/application_resnet152.txt b/.tether/man/application_resnet152.txt new file mode 100644 index 0000000000..bf7ff708cf --- /dev/null +++ b/.tether/man/application_resnet152.txt @@ -0,0 +1,63 @@ +__signature__ +keras.applications.ResNet152( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ResNet152 architecture. + +Reference: +- [Deep Residual Learning for Image Recognition]( + https://arxiv.org/abs/1512.03385) (CVPR 2015) + +For image classification use cases, see [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For ResNet, call `keras.applications.resnet.preprocess_input` on your +inputs before passing them to the model. `resnet.preprocess_input` will convert +the input images from RGB to BGR, then will zero-center each color channel with +respect to the ImageNet dataset, without scaling. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), or the path to the weights + file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). It should have exactly 3 + inputs channels, and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor + output of the last convolutional block. + - `avg` means that global average pooling will be applied to the output + of the last convolutional block, and thus the output of the + model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is `True`, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A Model instance. diff --git a/.tether/man/application_resnet152_v2.txt b/.tether/man/application_resnet152_v2.txt new file mode 100644 index 0000000000..3fd8ceb419 --- /dev/null +++ b/.tether/man/application_resnet152_v2.txt @@ -0,0 +1,62 @@ +__signature__ +keras.applications.ResNet152V2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ResNet152V2 architecture. + +Reference: +- [Identity Mappings in Deep Residual Networks]( + https://arxiv.org/abs/1603.05027) (CVPR 2016) + +For image classification use cases, see [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For ResNet, call `keras.applications.resnet_v2.preprocess_input` on your +inputs before passing them to the model. `resnet_v2.preprocess_input` will +scale input pixels between -1 and 1. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), or the path to the weights + file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). It should have exactly 3 + inputs channels, and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor + output of the last convolutional block. + - `avg` means that global average pooling will be applied to the output + of the last convolutional block, and thus the output of the + model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is `True`, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A Model instance. diff --git a/.tether/man/application_resnet50.txt b/.tether/man/application_resnet50.txt new file mode 100644 index 0000000000..1793958cd3 --- /dev/null +++ b/.tether/man/application_resnet50.txt @@ -0,0 +1,63 @@ +__signature__ +keras.applications.ResNet50( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ResNet50 architecture. + +Reference: +- [Deep Residual Learning for Image Recognition]( + https://arxiv.org/abs/1512.03385) (CVPR 2015) + +For image classification use cases, see [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For ResNet, call `keras.applications.resnet.preprocess_input` on your +inputs before passing them to the model. `resnet.preprocess_input` will convert +the input images from RGB to BGR, then will zero-center each color channel with +respect to the ImageNet dataset, without scaling. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), or the path to the weights + file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). It should have exactly 3 + inputs channels, and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor + output of the last convolutional block. + - `avg` means that global average pooling will be applied to the output + of the last convolutional block, and thus the output of the + model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is `True`, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A Model instance. diff --git a/.tether/man/application_resnet50_v2.txt b/.tether/man/application_resnet50_v2.txt new file mode 100644 index 0000000000..2abfa6708d --- /dev/null +++ b/.tether/man/application_resnet50_v2.txt @@ -0,0 +1,62 @@ +__signature__ +keras.applications.ResNet50V2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the ResNet50V2 architecture. + +Reference: +- [Identity Mappings in Deep Residual Networks]( + https://arxiv.org/abs/1603.05027) (CVPR 2016) + +For image classification use cases, see [this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +Note: each Keras Application expects a specific kind of input preprocessing. +For ResNet, call `keras.applications.resnet_v2.preprocess_input` on your +inputs before passing them to the model. `resnet_v2.preprocess_input` will +scale input pixels between -1 and 1. + +Args: + include_top: whether to include the fully-connected + layer at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), or the path to the weights + file to be loaded. + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified if `include_top` + is `False` (otherwise the input shape has to be `(224, 224, 3)` + (with `"channels_last"` data format) or `(3, 224, 224)` + (with `"channels_first"` data format). It should have exactly 3 + inputs channels, and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction when `include_top` + is `False`. + - `None` means that the output of the model will be the 4D tensor + output of the last convolutional block. + - `avg` means that global average pooling will be applied to the output + of the last convolutional block, and thus the output of the + model will be a 2D tensor. + - `max` means that global max pooling will be applied. + classes: optional number of classes to classify images into, only to be + specified if `include_top` is `True`, and if no `weights` argument is + specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" layer. + When loading pretrained weights, `classifier_activation` can only + be `None` or `"softmax"`. + +Returns: + A Model instance. diff --git a/.tether/man/application_vgg16.txt b/.tether/man/application_vgg16.txt new file mode 100644 index 0000000000..8d054c8382 --- /dev/null +++ b/.tether/man/application_vgg16.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.VGG16( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the VGG16 model. + +Reference: +- [Very Deep Convolutional Networks for Large-Scale Image Recognition]( +https://arxiv.org/abs/1409.1556) (ICLR 2015) + +For image classification use cases, see +[this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +The default input size for this model is 224x224. + +Note: each Keras Application expects a specific kind of input preprocessing. +For VGG16, call `keras.applications.vgg16.preprocess_input` on your +inputs before passing them to the model. +`vgg16.preprocess_input` will convert the input images from RGB to BGR, +then will zero-center each color channel with respect to the ImageNet +dataset, without scaling. + +Args: + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is `False` (otherwise the input shape + has to be `(224, 224, 3)` + (with `channels_last` data format) or + `(3, 224, 224)` (with `"channels_first"` data format). + It should have exactly 3 input channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` + can only be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_vgg19.txt b/.tether/man/application_vgg19.txt new file mode 100644 index 0000000000..143db7bb74 --- /dev/null +++ b/.tether/man/application_vgg19.txt @@ -0,0 +1,73 @@ +__signature__ +keras.applications.VGG19( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the VGG19 model. + +Reference: +- [Very Deep Convolutional Networks for Large-Scale Image Recognition]( +https://arxiv.org/abs/1409.1556) (ICLR 2015) + +For image classification use cases, see +[this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +The default input size for this model is 224x224. + +Note: each Keras Application expects a specific kind of input preprocessing. +For VGG19, call `keras.applications.vgg19.preprocess_input` on your +inputs before passing them to the model. +`vgg19.preprocess_input` will convert the input images from RGB to BGR, +then will zero-center each color channel with respect to the ImageNet +dataset, without scaling. + +Args: + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is `False` (otherwise the input shape + has to be `(224, 224, 3)` + (with `channels_last` data format) or + `(3, 224, 224)` (with `"channels_first"` data format). + It should have exactly 3 input channels, + and width and height should be no smaller than 32. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` can + only be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/application_xception.txt b/.tether/man/application_xception.txt new file mode 100644 index 0000000000..1237bfc716 --- /dev/null +++ b/.tether/man/application_xception.txt @@ -0,0 +1,69 @@ +__signature__ +keras.applications.Xception( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +__doc__ +Instantiates the Xception architecture. + +Reference: +- [Xception: Deep Learning with Depthwise Separable Convolutions]( + https://arxiv.org/abs/1610.02357) (CVPR 2017) + +For image classification use cases, see +[this page for detailed examples]( + https://keras.io/api/applications/#usage-examples-for-image-classification-models). + +For transfer learning use cases, make sure to read the +[guide to transfer learning & fine-tuning]( + https://keras.io/guides/transfer_learning/). + +The default input image size for this model is 299x299. + +Note: each Keras Application expects a specific kind of input preprocessing. +For Xception, call `keras.applications.xception.preprocess_input` +on your inputs before passing them to the model. +`xception.preprocess_input` will scale input pixels between -1 and 1. + +Args: + include_top: whether to include the 3 fully-connected + layers at the top of the network. + weights: one of `None` (random initialization), + `"imagenet"` (pre-training on ImageNet), + or the path to the weights file to be loaded. + input_tensor: optional Keras tensor + (i.e. output of `layers.Input()`) + to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is `False` (otherwise the input shape + has to be `(299, 299, 3)`. + It should have exactly 3 inputs channels, + and width and height should be no smaller than 71. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional block. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional block, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is `True`, and + if no `weights` argument is specified. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. When loading pretrained weights, `classifier_activation` can + only be `None` or `"softmax"`. + +Returns: + A model instance. diff --git a/.tether/man/audio_dataset_from_directory.txt b/.tether/man/audio_dataset_from_directory.txt new file mode 100644 index 0000000000..98c07b8acc --- /dev/null +++ b/.tether/man/audio_dataset_from_directory.txt @@ -0,0 +1,108 @@ +__signature__ +keras.utils.audio_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + batch_size=32, + sampling_rate=None, + output_sequence_length=None, + ragged=False, + shuffle=True, + seed=None, + validation_split=None, + subset=None, + follow_links=False, + verbose=True +) +__doc__ +Generates a `tf.data.Dataset` from audio files in a directory. + +If your directory structure is: + +``` +main_directory/ +...class_a/ +......a_audio_1.wav +......a_audio_2.wav +...class_b/ +......b_audio_1.wav +......b_audio_2.wav +``` + +Then calling `audio_dataset_from_directory(main_directory, +labels='inferred')` +will return a `tf.data.Dataset` that yields batches of audio files from +the subdirectories `class_a` and `class_b`, together with labels +0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). + +Only `.wav` files are supported at this time. + +Args: + directory: Directory where the data is located. + If `labels` is `"inferred"`, it should contain subdirectories, + each containing audio files for a class. Otherwise, the directory + structure is ignored. + labels: Either "inferred" (labels are generated from the directory + structure), `None` (no labels), or a list/tuple of integer labels + of the same size as the number of audio files found in + the directory. Labels should be sorted according to the + alphanumeric order of the audio file paths + (obtained via `os.walk(directory)` in Python). + label_mode: String describing the encoding of `labels`. Options are: + - `"int"`: means that the labels are encoded as integers (e.g. for + `sparse_categorical_crossentropy` loss). + - `"categorical"` means that the labels are encoded as a categorical + vector (e.g. for `categorical_crossentropy` loss) + - `"binary"` means that the labels (there can be only 2) + are encoded as `float32` scalars with values 0 + or 1 (e.g. for `binary_crossentropy`). + - `None` (no labels). + class_names: Only valid if "labels" is `"inferred"`. + This is the explicit list of class names + (must match names of subdirectories). Used to control the order + of the classes (otherwise alphanumerical order is used). + batch_size: Size of the batches of data. Default: 32. If `None`, + the data will not be batched + (the dataset will yield individual samples). + sampling_rate: Audio sampling rate (in samples per second). + output_sequence_length: Maximum length of an audio sequence. Audio files + longer than this will be truncated to `output_sequence_length`. + If set to `None`, then all sequences in the same batch will + be padded to the + length of the longest sequence in the batch. + ragged: Whether to return a Ragged dataset (where each sequence has its + own length). Defaults to `False`. + shuffle: Whether to shuffle the data. Defaults to `True`. + If set to `False`, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + validation_split: Optional float between 0 and 1, fraction of data to + reserve for validation. + subset: Subset of the data to return. One of `"training"`, + `"validation"` or `"both"`. Only used if `validation_split` is set. + follow_links: Whether to visits subdirectories pointed to by symlinks. + Defaults to `False`. + verbose: Whether to display number information on classes and + number of files found. Defaults to `True`. + +Returns: + +A `tf.data.Dataset` object. + +- If `label_mode` is `None`, it yields `string` tensors of shape + `(batch_size,)`, containing the contents of a batch of audio files. +- Otherwise, it yields a tuple `(audio, labels)`, where `audio` + has shape `(batch_size, sequence_length, num_channels)` and `labels` + follows the format described + below. + +Rules regarding labels format: + +- if `label_mode` is `int`, the labels are an `int32` tensor of shape + `(batch_size,)`. +- if `label_mode` is `binary`, the labels are a `float32` tensor of + 1s and 0s of shape `(batch_size, 1)`. +- if `label_mode` is `categorical`, the labels are a `float32` tensor + of shape `(batch_size, num_classes)`, representing a one-hot + encoding of the class index. + diff --git a/.tether/man/bidirectional.txt b/.tether/man/bidirectional.txt new file mode 100644 index 0000000000..583bf2a640 --- /dev/null +++ b/.tether/man/bidirectional.txt @@ -0,0 +1,152 @@ +Help on class Bidirectional in module keras.src.layers.rnn.bidirectional: + +class Bidirectional(keras.src.layers.core.wrapper.Wrapper) + | Bidirectional(layer, merge_mode='concat', weights=None, backward_layer=None, **kwargs) + | + | Bidirectional wrapper for RNNs. + | + | Args: + | layer: `keras.layers.RNN` instance, such as + | `keras.layers.LSTM` or `keras.layers.GRU`. + | It could also be a `keras.layers.Layer` instance + | that meets the following criteria: + | 1. Be a sequence-processing layer (accepts 3D+ inputs). + | 2. Have a `go_backwards`, `return_sequences` and `return_state` + | attribute (with the same semantics as for the `RNN` class). + | 3. Have an `input_spec` attribute. + | 4. Implement serialization via `get_config()` and `from_config()`. + | Note that the recommended way to create new RNN layers is to write a + | custom RNN cell and use it with `keras.layers.RNN`, instead of + | subclassing `keras.layers.Layer` directly. + | When `return_sequences` is `True`, the output of the masked + | timestep will be zero regardless of the layer's original + | `zero_output_for_mask` value. + | merge_mode: Mode by which outputs of the forward and backward RNNs + | will be combined. One of `{"sum", "mul", "concat", "ave", None}`. + | If `None`, the outputs will not be combined, + | they will be returned as a list. Defaults to `"concat"`. + | backward_layer: Optional `keras.layers.RNN`, + | or `keras.layers.Layer` instance to be used to handle + | backwards input processing. + | If `backward_layer` is not provided, the layer instance passed + | as the `layer` argument will be used to generate the backward layer + | automatically. + | Note that the provided `backward_layer` layer should have properties + | matching those of the `layer` argument, in particular + | it should have the same values for `stateful`, `return_states`, + | `return_sequences`, etc. In addition, `backward_layer` + | and `layer` should have different `go_backwards` argument values. + | A `ValueError` will be raised if these requirements are not met. + | + | Call arguments: + | The call arguments for this layer are the same as those of the + | wrapped RNN layer. Beware that when passing the `initial_state` + | argument during the call of this layer, the first half in the + | list of elements in the `initial_state` list will be passed to + | the forward RNN call and the last half in the list of elements + | will be passed to the backward RNN call. + | + | Note: instantiating a `Bidirectional` layer from an existing RNN layer + | instance will not reuse the weights state of the RNN layer instance -- the + | `Bidirectional` layer will have freshly initialized weights. + | + | Examples: + | + | ```python + | model = Sequential([ + | Input(shape=(5, 10)), + | Bidirectional(LSTM(10, return_sequences=True), + | Bidirectional(LSTM(10)), + | Dense(5, activation="softmax"), + | ]) + | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') + | + | # With custom backward layer + | forward_layer = LSTM(10, return_sequences=True) + | backward_layer = LSTM(10, activation='relu', return_sequences=True, + | go_backwards=True) + | model = Sequential([ + | Input(shape=(5, 10)), + | Bidirectional(forward_layer, backward_layer=backward_layer), + | Dense(5, activation="softmax"), + | ]) + | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') + | ``` + | + | Method resolution order: + | Bidirectional + | keras.src.layers.core.wrapper.Wrapper + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | layer, + | merge_mode='concat', + | weights=None, + | backward_layer=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build( + | self, + | sequences_shape, + | initial_state_shape=None + | ) + | + | call( + | self, + | sequences, + | initial_state=None, + | mask=None, + | training=None + | ) + | + | compute_mask( + | self, + | _, + | mask + | ) + | + | compute_output_shape( + | self, + | sequences_shape, + | initial_state_shape=None + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | reset_state(self) + | + | reset_states(self) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | diff --git a/.tether/man/callback_backup_and_restore.txt b/.tether/man/callback_backup_and_restore.txt new file mode 100644 index 0000000000..b5731ce4cd --- /dev/null +++ b/.tether/man/callback_backup_and_restore.txt @@ -0,0 +1,131 @@ +Help on class BackupAndRestore in module keras.src.callbacks.backup_and_restore: + +class BackupAndRestore(keras.src.callbacks.callback.Callback) + | BackupAndRestore(backup_dir, save_freq='epoch', delete_checkpoint=True) + | + | Callback to back up and restore the training state. + | + | `BackupAndRestore` callback is intended to recover training from an + | interruption that has happened in the middle of a `Model.fit` execution, by + | backing up the training states in a temporary checkpoint file, at the end of + | each epoch. Each backup overwrites the previously written checkpoint file, + | so at any given time there is at most one such checkpoint file for + | backup/restoring purpose. + | + | If training restarts before completion, the training state (which includes + | the `Model` weights and epoch number) is restored to the most recently saved + | state at the beginning of a new `Model.fit` run. At the completion of a + | `Model.fit` run, the temporary checkpoint file is deleted. + | + | Note that the user is responsible to bring jobs back after the interruption. + | This callback is important for the backup and restore mechanism for fault + | tolerance purpose, and the model to be restored from a previous checkpoint + | is expected to be the same as the one used to back up. If user changes + | arguments passed to compile or fit, the checkpoint saved for fault tolerance + | can become invalid. + | + | Example: + | + | >>> class InterruptingCallback(keras.callbacks.Callback): + | ... def on_epoch_begin(self, epoch, logs=None): + | ... if epoch == 4: + | ... raise RuntimeError('Interrupting!') + | >>> callback = keras.callbacks.BackupAndRestore(backup_dir="/tmp/backup") + | >>> model = keras.models.Sequential([keras.layers.Dense(10)]) + | >>> model.compile(keras.optimizers.SGD(), loss='mse') + | >>> try: + | ... model.fit(np.arange(100).reshape(5, 20), np.zeros(5), epochs=10, + | ... batch_size=1, callbacks=[callback, InterruptingCallback()], + | ... verbose=0) + | ... except: + | ... pass + | >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), + | ... epochs=10, batch_size=1, callbacks=[callback], + | ... verbose=0) + | >>> # Only 6 more epochs are run, since first training got interrupted at + | >>> # zero-indexed epoch 4, second training will continue from 4 to 9. + | >>> len(history.history['loss']) + | >>> 6 + | + | Args: + | backup_dir: String, path of directory where to store the data + | needed to restore the model. The directory + | cannot be reused elsewhere to store other files, e.g. by the + | `BackupAndRestore` callback of another training run, + | or by another callback (e.g. `ModelCheckpoint`) + | of the same training run. + | save_freq: `"epoch"`, integer, or `False`. When set to `"epoch"` + | the callback saves the checkpoint at the end of each epoch. + | When set to an integer, the callback saves the checkpoint every + | `save_freq` batches. Set `save_freq=False` only if using + | preemption checkpointing (i.e. with `save_before_preemption=True`). + | delete_checkpoint: Boolean, defaults to `True`. This `BackupAndRestore` + | callback works by saving a checkpoint to back up the training state. + | If `delete_checkpoint=True`, the checkpoint will be deleted after + | training is finished. Use `False` if you'd like to keep the checkpoint + | for future usage. + | + | Method resolution order: + | BackupAndRestore + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | backup_dir, + | save_freq='epoch', + | delete_checkpoint=True + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_train_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a training batch in `fit` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_train_begin(self, logs=None) + | Get training state from temporary file and restore it. + | + | on_train_end(self, logs=None) + | Called at the end of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_epoch_end()` is passed to this argument for this method but + | that may change in the future. + | + diff --git a/.tether/man/callback_csv_logger.txt b/.tether/man/callback_csv_logger.txt new file mode 100644 index 0000000000..94bb2e6141 --- /dev/null +++ b/.tether/man/callback_csv_logger.txt @@ -0,0 +1,70 @@ +Help on class CSVLogger in module keras.src.callbacks.csv_logger: + +class CSVLogger(keras.src.callbacks.callback.Callback) + | CSVLogger(filename, separator=',', append=False) + | + | Callback that streams epoch results to a CSV file. + | + | Supports all values that can be represented as a string, + | including 1D iterables such as `np.ndarray`. + | + | Args: + | filename: Filename of the CSV file, e.g. `'run/log.csv'`. + | separator: String used to separate elements in the CSV file. + | append: Boolean. True: append if file exists (useful for continuing + | training). False: overwrite existing file. + | + | Example: + | + | ```python + | csv_logger = CSVLogger('training.log') + | model.fit(X_train, Y_train, callbacks=[csv_logger]) + | ``` + | + | Method resolution order: + | CSVLogger + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__(self, filename, separator=',', append=False) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_train_begin(self, logs=None) + | Called at the beginning of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_train_end(self, logs=None) + | Called at the end of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_epoch_end()` is passed to this argument for this method but + | that may change in the future. + | diff --git a/.tether/man/callback_early_stopping.txt b/.tether/man/callback_early_stopping.txt new file mode 100644 index 0000000000..0ec33f81d0 --- /dev/null +++ b/.tether/man/callback_early_stopping.txt @@ -0,0 +1,120 @@ +Help on class EarlyStopping in module keras.src.callbacks.early_stopping: + +class EarlyStopping(keras.src.callbacks.callback.Callback) + | EarlyStopping(monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto', baseline=None, restore_best_weights=False, start_from_epoch=0) + | + | Stop training when a monitored metric has stopped improving. + | + | Assuming the goal of a training is to minimize the loss. With this, the + | metric to be monitored would be `'loss'`, and mode would be `'min'`. A + | `model.fit()` training loop will check at end of every epoch whether + | the loss is no longer decreasing, considering the `min_delta` and + | `patience` if applicable. Once it's found no longer decreasing, + | `model.stop_training` is marked True and the training terminates. + | + | The quantity to be monitored needs to be available in `logs` dict. + | To make it so, pass the loss or metrics at `model.compile()`. + | + | Args: + | monitor: Quantity to be monitored. Defaults to `"val_loss"`. + | min_delta: Minimum change in the monitored quantity to qualify as an + | improvement, i.e. an absolute change of less than min_delta, will + | count as no improvement. Defaults to `0`. + | patience: Number of epochs with no improvement after which training will + | be stopped. Defaults to `0`. + | verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 displays + | messages when the callback takes an action. Defaults to `0`. + | mode: One of `{"auto", "min", "max"}`. In `min` mode, training will stop + | when the quantity monitored has stopped decreasing; in `"max"` mode + | it will stop when the quantity monitored has stopped increasing; in + | `"auto"` mode, the direction is automatically inferred from the name + | of the monitored quantity. Defaults to `"auto"`. + | baseline: Baseline value for the monitored quantity. If not `None`, + | training will stop if the model doesn't show improvement over the + | baseline. Defaults to `None`. + | restore_best_weights: Whether to restore model weights from the epoch + | with the best value of the monitored quantity. If `False`, the model + | weights obtained at the last step of training are used. An epoch + | will be restored regardless of the performance relative to the + | `baseline`. If no epoch improves on `baseline`, training will run + | for `patience` epochs and restore weights from the best epoch in + | that set. Defaults to `False`. + | start_from_epoch: Number of epochs to wait before starting to monitor + | improvement. This allows for a warm-up period in which no + | improvement is expected and thus training will not be stopped. + | Defaults to `0`. + | + | + | Example: + | + | >>> callback = keras.callbacks.EarlyStopping(monitor='loss', + | ... patience=3) + | >>> # This callback will stop the training when there is no improvement in + | >>> # the loss for three consecutive epochs. + | >>> model = keras.models.Sequential([keras.layers.Dense(10)]) + | >>> model.compile(keras.optimizers.SGD(), loss='mse') + | >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), + | ... epochs=10, batch_size=1, callbacks=[callback], + | ... verbose=0) + | >>> len(history.history['loss']) # Only 4 epochs are run. + | 4 + | + | Method resolution order: + | EarlyStopping + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | monitor='val_loss', + | min_delta=0, + | patience=0, + | verbose=0, + | mode='auto', + | baseline=None, + | restore_best_weights=False, + | start_from_epoch=0 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_monitor_value(self, logs) + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_train_begin(self, logs=None) + | Called at the beginning of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_train_end(self, logs=None) + | Called at the end of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_epoch_end()` is passed to this argument for this method but + | that may change in the future. + | diff --git a/.tether/man/callback_lambda.txt b/.tether/man/callback_lambda.txt new file mode 100644 index 0000000000..44cdc12f8c --- /dev/null +++ b/.tether/man/callback_lambda.txt @@ -0,0 +1,81 @@ +Help on class LambdaCallback in module keras.src.callbacks.lambda_callback: + +class LambdaCallback(keras.src.callbacks.callback.Callback) + | LambdaCallback(on_epoch_begin=None, on_epoch_end=None, on_train_begin=None, on_train_end=None, on_train_batch_begin=None, on_train_batch_end=None, **kwargs) + | + | Callback for creating simple, custom callbacks on-the-fly. + | + | This callback is constructed with anonymous functions that will be called + | at the appropriate time (during `Model.{fit | evaluate | predict}`). + | Note that the callbacks expects positional arguments, as: + | + | - `on_epoch_begin` and `on_epoch_end` expect two positional arguments: + | `epoch`, `logs` + | - `on_train_begin` and `on_train_end` expect one positional argument: + | `logs` + | - `on_train_batch_begin` and `on_train_batch_end` expect two positional + | arguments: `batch`, `logs` + | - See `Callback` class definition for the full list of functions and their + | expected arguments. + | + | Args: + | on_epoch_begin: called at the beginning of every epoch. + | on_epoch_end: called at the end of every epoch. + | on_train_begin: called at the beginning of model training. + | on_train_end: called at the end of model training. + | on_train_batch_begin: called at the beginning of every train batch. + | on_train_batch_end: called at the end of every train batch. + | kwargs: Any function in `Callback` that you want to override by + | passing `function_name=function`. For example, + | `LambdaCallback(.., on_train_end=train_end_fn)`. The custom function + | needs to have same arguments as the ones defined in `Callback`. + | + | Example: + | + | ```python + | # Print the batch number at the beginning of every batch. + | batch_print_callback = LambdaCallback( + | on_train_batch_begin=lambda batch,logs: print(batch)) + | + | # Stream the epoch loss to a file in JSON format. The file content + | # is not well-formed JSON but rather has a JSON object per line. + | import json + | json_log = open('loss_log.json', mode='wt', buffering=1) + | json_logging_callback = LambdaCallback( + | on_epoch_end=lambda epoch, logs: json_log.write( + | json.dumps({'epoch': epoch, 'loss': logs['loss']}) + ' + | '), + | on_train_end=lambda logs: json_log.close() + | ) + | + | # Terminate some processes after having finished model training. + | processes = ... + | cleanup_callback = LambdaCallback( + | on_train_end=lambda logs: [ + | p.terminate() for p in processes if p.is_alive()]) + | + | model.fit(..., + | callbacks=[batch_print_callback, + | json_logging_callback, + | cleanup_callback]) + | ``` + | + | Method resolution order: + | LambdaCallback + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | on_epoch_begin=None, + | on_epoch_end=None, + | on_train_begin=None, + | on_train_end=None, + | on_train_batch_begin=None, + | on_train_batch_end=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | diff --git a/.tether/man/callback_learning_rate_scheduler.txt b/.tether/man/callback_learning_rate_scheduler.txt new file mode 100644 index 0000000000..b25eaf649a --- /dev/null +++ b/.tether/man/callback_learning_rate_scheduler.txt @@ -0,0 +1,86 @@ +Help on class LearningRateScheduler in module keras.src.callbacks.learning_rate_scheduler: + +class LearningRateScheduler(keras.src.callbacks.callback.Callback) + | LearningRateScheduler(schedule, verbose=0) + | + | Learning rate scheduler. + | + | At the beginning of every epoch, this callback gets the updated learning + | rate value from `schedule` function provided at `__init__`, with the current + | epoch and current learning rate, and applies the updated learning rate on + | the optimizer. + | + | Args: + | schedule: A function that takes an epoch index (integer, indexed from 0) + | and current learning rate (float) as inputs and returns a new + | learning rate as output (float). + | verbose: Integer. 0: quiet, 1: log update messages. + | + | Example: + | + | >>> # This function keeps the initial learning rate for the first ten epochs + | >>> # and decreases it exponentially after that. + | >>> def scheduler(epoch, lr): + | ... if epoch < 10: + | ... return lr + | ... else: + | ... return lr * ops.exp(-0.1) + | >>> + | >>> model = keras.models.Sequential([keras.layers.Dense(10)]) + | >>> model.compile(keras.optimizers.SGD(), loss='mse') + | >>> round(model.optimizer.learning_rate, 5) + | 0.01 + | + | >>> callback = keras.callbacks.LearningRateScheduler(scheduler) + | >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5), + | ... epochs=15, callbacks=[callback], verbose=0) + | >>> round(model.optimizer.learning_rate, 5) + | 0.00607 + | + | Method resolution order: + | LearningRateScheduler + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | schedule, + | verbose=0 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_begin( + | self, + | epoch, + | logs=None + | ) + | Called at the start of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | diff --git a/.tether/man/callback_model_checkpoint.txt b/.tether/man/callback_model_checkpoint.txt new file mode 100644 index 0000000000..e5826a2c15 --- /dev/null +++ b/.tether/man/callback_model_checkpoint.txt @@ -0,0 +1,185 @@ +Help on class ModelCheckpoint in module keras.src.callbacks.model_checkpoint: + +class ModelCheckpoint(keras.src.callbacks.callback.Callback) + | ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', save_freq='epoch', initial_value_threshold=None) + | + | Callback to save the Keras model or model weights at some frequency. + | + | `ModelCheckpoint` callback is used in conjunction with training using + | `model.fit()` to save a model or weights (in a checkpoint file) at some + | interval, so the model or weights can be loaded later to continue the + | training from the state saved. + | + | A few options this callback provides include: + | + | - Whether to only keep the model that has achieved the "best performance" so + | far, or whether to save the model at the end of every epoch regardless of + | performance. + | - Definition of "best"; which quantity to monitor and whether it should be + | maximized or minimized. + | - The frequency it should save at. Currently, the callback supports saving + | at the end of every epoch, or after a fixed number of training batches. + | - Whether only weights are saved, or the whole model is saved. + | + | Example: + | + | ```python + | model.compile(loss=..., optimizer=..., + | metrics=['accuracy']) + | + | EPOCHS = 10 + | checkpoint_filepath = '/tmp/ckpt/checkpoint.model.keras' + | model_checkpoint_callback = keras.callbacks.ModelCheckpoint( + | filepath=checkpoint_filepath, + | monitor='val_accuracy', + | mode='max', + | save_best_only=True) + | + | # Model is saved at the end of every epoch, if it's the best seen so far. + | model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback]) + | + | # The model (that are considered the best) can be loaded as - + | keras.models.load_model(checkpoint_filepath) + | + | # Alternatively, one could checkpoint just the model weights as - + | checkpoint_filepath = '/tmp/ckpt/checkpoint.weights.h5' + | model_checkpoint_callback = keras.callbacks.ModelCheckpoint( + | filepath=checkpoint_filepath, + | save_weights_only=True, + | monitor='val_accuracy', + | mode='max', + | save_best_only=True) + | + | # Model weights are saved at the end of every epoch, if it's the best seen + | # so far. + | model.fit(epochs=EPOCHS, callbacks=[model_checkpoint_callback]) + | + | # The model weights (that are considered the best) can be loaded as - + | model.load_weights(checkpoint_filepath) + | ``` + | + | Args: + | filepath: string or `PathLike`, path to save the model file. + | `filepath` can contain named formatting options, + | which will be filled the value of `epoch` and keys in `logs` + | (passed in `on_epoch_end`). + | The `filepath` name needs to end with `".weights.h5"` when + | `save_weights_only=True` or should end with `".keras"` when + | checkpoint saving the whole model (default). + | For example: + | if `filepath` is `"{epoch:02d}-{val_loss:.2f}.keras"`, then the + | model checkpoints will be saved with the epoch number and the + | validation loss in the filename. The directory of the filepath + | should not be reused by any other callbacks to avoid conflicts. + | monitor: The metric name to monitor. Typically the metrics are set by + | the `Model.compile` method. Note: + | * Prefix the name with `"val_"` to monitor validation metrics. + | * Use `"loss"` or `"val_loss"` to monitor the model's total loss. + | * If you specify metrics as strings, like `"accuracy"`, pass the + | same string (with or without the `"val_"` prefix). + | * If you pass `metrics.Metric` objects, `monitor` should be set to + | `metric.name` + | * If you're not sure about the metric names you can check the + | contents of the `history.history` dictionary returned by + | `history = model.fit()` + | * Multi-output models set additional prefixes on the metric names. + | verbose: Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 + | displays messages when the callback takes an action. + | save_best_only: if `save_best_only=True`, it only saves when the model + | is considered the "best" and the latest best model according to the + | quantity monitored will not be overwritten. If `filepath` doesn't + | contain formatting options like `{epoch}` then `filepath` will be + | overwritten by each new better model. + | mode: one of {`"auto"`, `"min"`, `"max"`}. If `save_best_only=True`, the + | decision to overwrite the current save file is made based on either + | the maximization or the minimization of the monitored quantity. + | For `val_acc`, this should be `"max"`, for `val_loss` this should be + | `"min"`, etc. In `"auto"` mode, the mode is set to `"max"` if the + | quantities monitored are `"acc"` or start with `"fmeasure"` and are + | set to `"min"` for the rest of the quantities. + | save_weights_only: if `True`, then only the model's weights will be + | saved (`model.save_weights(filepath)`), else the full model is + | saved (`model.save(filepath)`). + | save_freq: `"epoch"` or integer. When using `"epoch"`, the callback + | saves the model after each epoch. When using integer, the callback + | saves the model at end of this many batches. If the `Model` is + | compiled with `steps_per_execution=N`, then the saving criteria will + | be checked every Nth batch. Note that if the saving isn't aligned to + | epochs, the monitored metric may potentially be less reliable (it + | could reflect as little as 1 batch, since the metrics get reset + | every epoch). Defaults to `"epoch"`. + | initial_value_threshold: Floating point initial "best" value of the + | metric to be monitored. Only applies if `save_best_value=True`. Only + | overwrites the model weights already saved if the performance of + | current model is better than this value. + | + | Method resolution order: + | ModelCheckpoint + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filepath, + | monitor='val_loss', + | verbose=0, + | save_best_only=False, + | save_weights_only=False, + | mode='auto', + | save_freq='epoch', + | initial_value_threshold=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_begin( + | self, + | epoch, + | logs=None + | ) + | Called at the start of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_train_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a training batch in `fit` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | diff --git a/.tether/man/callback_progbar_logger.txt b/.tether/man/callback_progbar_logger.txt new file mode 100644 index 0000000000..bd444c1c6c --- /dev/null +++ b/.tether/man/callback_progbar_logger.txt @@ -0,0 +1,158 @@ +Help on class ProgbarLogger in module keras.src.callbacks.progbar_logger: + +class ProgbarLogger(keras.src.callbacks.callback.Callback) + | Callback that prints metrics to stdout. + | + | Args: + | count_mode: One of `"steps"` or `"samples"`. + | Whether the progress bar should + | count samples seen or steps (batches) seen. + | + | Raises: + | ValueError: In case of invalid `count_mode`. + | + | Method resolution order: + | ProgbarLogger + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__(self) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_begin( + | self, + | epoch, + | logs=None + | ) + | Called at the start of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_predict_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a batch in `predict` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_predict_begin(self, logs=None) + | Called at the beginning of prediction. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_predict_end(self, logs=None) + | Called at the end of prediction. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a batch in `evaluate` methods. + | + | Also called at the end of a validation batch in the `fit` + | methods, if validation data is provided. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_test_begin(self, logs=None) + | Called at the beginning of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_end(self, logs=None) + | Called at the end of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_test_batch_end()` is passed to this argument for this method + | but that may change in the future. + | + | on_train_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a training batch in `fit` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_train_begin(self, logs=None) + | Called at the beginning of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | set_params(self, params) + | diff --git a/.tether/man/callback_reduce_lr_on_plateau.txt b/.tether/man/callback_reduce_lr_on_plateau.txt new file mode 100644 index 0000000000..33451653d2 --- /dev/null +++ b/.tether/man/callback_reduce_lr_on_plateau.txt @@ -0,0 +1,90 @@ +Help on class ReduceLROnPlateau in module keras.src.callbacks.reduce_lr_on_plateau: + +class ReduceLROnPlateau(keras.src.callbacks.callback.Callback) + | ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=10, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0.0, **kwargs) + | + | Reduce learning rate when a metric has stopped improving. + | + | Models often benefit from reducing the learning rate by a factor + | of 2-10 once learning stagnates. This callback monitors a + | quantity and if no improvement is seen for a 'patience' number + | of epochs, the learning rate is reduced. + | + | Example: + | + | ```python + | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, + | patience=5, min_lr=0.001) + | model.fit(x_train, y_train, callbacks=[reduce_lr]) + | ``` + | + | Args: + | monitor: String. Quantity to be monitored. + | factor: Float. Factor by which the learning rate will be reduced. + | `new_lr = lr * factor`. + | patience: Integer. Number of epochs with no improvement after which + | learning rate will be reduced. + | verbose: Integer. 0: quiet, 1: update messages. + | mode: String. One of `{'auto', 'min', 'max'}`. In `'min'` mode, + | the learning rate will be reduced when the + | quantity monitored has stopped decreasing; in `'max'` mode it will + | be reduced when the quantity monitored has stopped increasing; in + | `'auto'` mode, the direction is automatically inferred from the name + | of the monitored quantity. + | min_delta: Float. Threshold for measuring the new optimum, to only focus + | on significant changes. + | cooldown: Integer. Number of epochs to wait before resuming normal + | operation after the learning rate has been reduced. + | min_lr: Float. Lower bound on the learning rate. + | + | Method resolution order: + | ReduceLROnPlateau + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | monitor='val_loss', + | factor=0.1, + | patience=10, + | verbose=0, + | mode='auto', + | min_delta=0.0001, + | cooldown=0, + | min_lr=0.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | in_cooldown(self) + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_train_begin(self, logs=None) + | Called at the beginning of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + diff --git a/.tether/man/callback_remote_monitor.txt b/.tether/man/callback_remote_monitor.txt new file mode 100644 index 0000000000..871bd7ccf2 --- /dev/null +++ b/.tether/man/callback_remote_monitor.txt @@ -0,0 +1,61 @@ +Help on class RemoteMonitor in module keras.src.callbacks.remote_monitor: + +class RemoteMonitor(keras.src.callbacks.callback.Callback) + | RemoteMonitor(root='http://localhost:9000', path='/publish/epoch/end/', field='data', headers=None, send_as_json=False) + | + | Callback used to stream events to a server. + | + | Requires the `requests` library. + | Events are sent to `root + '/publish/epoch/end/'` by default. Calls are + | HTTP POST, with a `data` argument which is a + | JSON-encoded dictionary of event data. + | If `send_as_json=True`, the content type of the request will be + | `"application/json"`. + | Otherwise the serialized JSON will be sent within a form. + | + | Args: + | root: String; root url of the target server. + | path: String; path relative to `root` to which the events will be sent. + | field: String; JSON field under which the data will be stored. + | The field is used only if the payload is sent within a form + | (i.e. when `send_as_json=False`). + | headers: Dictionary; optional custom HTTP headers. + | send_as_json: Boolean; whether the request should be + | sent as `"application/json"`. + | + | Method resolution order: + | RemoteMonitor + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | root='http://localhost:9000', + | path='/publish/epoch/end/', + | field='data', + | headers=None, + | send_as_json=False + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + diff --git a/.tether/man/callback_swap_ema_weights.txt b/.tether/man/callback_swap_ema_weights.txt new file mode 100644 index 0000000000..b50bd92a7b --- /dev/null +++ b/.tether/man/callback_swap_ema_weights.txt @@ -0,0 +1,125 @@ +Help on class SwapEMAWeights in module keras.src.callbacks.swap_ema_weights: + +class SwapEMAWeights(keras.src.callbacks.callback.Callback) + | SwapEMAWeights(swap_on_epoch=False) + | + | Swaps model weights and EMA weights before and after evaluation. + | + | This callbacks replaces the model's weight values with the values of + | the optimizer's EMA weights (the exponential moving average of the past + | model weights values, implementing "Polyak averaging") before model + | evaluation, and restores the previous weights after evaluation. + | + | The `SwapEMAWeights` callback is to be used in conjunction with + | an optimizer that sets `use_ema=True`. + | + | Note that the weights are swapped in-place in order to save memory. + | The behavior is undefined if you modify the EMA weights + | or model weights in other callbacks. + | + | Example: + | + | ```python + | # Remember to set `use_ema=True` in the optimizer + | optimizer = SGD(use_ema=True) + | model.compile(optimizer=optimizer, loss=..., metrics=...) + | + | # Metrics will be computed with EMA weights + | model.fit(X_train, Y_train, callbacks=[SwapEMAWeights()]) + | + | # If you want to save model checkpoint with EMA weights, you can set + | # `swap_on_epoch=True` and place ModelCheckpoint after SwapEMAWeights. + | model.fit( + | X_train, + | Y_train, + | callbacks=[SwapEMAWeights(swap_on_epoch=True), ModelCheckpoint(...)] + | ) + | ``` + | + | Args: + | swap_on_epoch: whether to perform swapping at `on_epoch_begin()` + | and `on_epoch_end()`. This is useful if you want to use + | EMA weights for other callbacks such as `ModelCheckpoint`. + | Defaults to `False`. + | + | Method resolution order: + | SwapEMAWeights + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__(self, swap_on_epoch=False) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_begin( + | self, + | epoch, + | logs=None + | ) + | Called at the start of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Called at the end of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict, metric results for this training epoch, and for the + | validation epoch if validation is performed. Validation result + | keys are prefixed with `val_`. For training epoch, the values of + | the `Model`'s metrics are returned. Example: + | `{'loss': 0.2, 'accuracy': 0.7}`. + | + | on_predict_begin(self, logs=None) + | Called at the beginning of prediction. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_predict_end(self, logs=None) + | Called at the end of prediction. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_begin(self, logs=None) + | Called at the beginning of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_end(self, logs=None) + | Called at the end of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_test_batch_end()` is passed to this argument for this method + | but that may change in the future. + | + diff --git a/.tether/man/callback_tensorboard.txt b/.tether/man/callback_tensorboard.txt new file mode 100644 index 0000000000..0173bde66f --- /dev/null +++ b/.tether/man/callback_tensorboard.txt @@ -0,0 +1,267 @@ +Help on class TensorBoard in module keras.src.callbacks.tensorboard: + +class TensorBoard(keras.src.callbacks.callback.Callback) + | TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=False, write_steps_per_second=False, update_freq='epoch', profile_batch=0, embeddings_freq=0, embeddings_metadata=None) + | + | Enable visualizations for TensorBoard. + | + | TensorBoard is a visualization tool provided with TensorFlow. A TensorFlow + | installation is required to use this callback. + | + | This callback logs events for TensorBoard, including: + | + | * Metrics summary plots + | * Training graph visualization + | * Weight histograms + | * Sampled profiling + | + | When used in `model.evaluate()` or regular validation + | in addition to epoch summaries, there will be a summary that records + | evaluation metrics vs `model.optimizer.iterations` written. The metric names + | will be prepended with `evaluation`, with `model.optimizer.iterations` being + | the step in the visualized TensorBoard. + | + | If you have installed TensorFlow with pip, you should be able + | to launch TensorBoard from the command line: + | + | ``` + | tensorboard --logdir=path_to_your_logs + | ``` + | + | You can find more information about TensorBoard + | [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard). + | + | Args: + | log_dir: the path of the directory where to save the log files to be + | parsed by TensorBoard. e.g., + | `log_dir = os.path.join(working_dir, 'logs')`. + | This directory should not be reused by any other callbacks. + | histogram_freq: frequency (in epochs) at which to compute + | weight histograms for the layers of the model. If set to 0, + | histograms won't be computed. Validation data (or split) must be + | specified for histogram visualizations. + | write_graph: (Not supported at this time) + | Whether to visualize the graph in TensorBoard. + | Note that the log file can become quite large + | when `write_graph` is set to `True`. + | write_images: whether to write model weights to visualize as image in + | TensorBoard. + | write_steps_per_second: whether to log the training steps per second + | into TensorBoard. This supports both epoch and batch frequency + | logging. + | update_freq: `"batch"` or `"epoch"` or integer. When using `"epoch"`, + | writes the losses and metrics to TensorBoard after every epoch. + | If using an integer, let's say `1000`, all metrics and losses + | (including custom ones added by `Model.compile`) will be logged to + | TensorBoard every 1000 batches. `"batch"` is a synonym for 1, + | meaning that they will be written every batch. + | Note however that writing too frequently to TensorBoard can slow + | down your training, especially when used with distribution + | strategies as it will incur additional synchronization overhead. + | Batch-level summary writing is also available via `train_step` + | override. Please see + | [TensorBoard Scalars tutorial]( + | https://www.tensorflow.org/tensorboard/scalars_and_keras#batch-level_logging) # noqa: E501 + | for more details. + | profile_batch: (Not supported at this time) + | Profile the batch(es) to sample compute characteristics. + | profile_batch must be a non-negative integer or a tuple of integers. + | A pair of positive integers signify a range of batches to profile. + | By default, profiling is disabled. + | embeddings_freq: frequency (in epochs) at which embedding layers will be + | visualized. If set to 0, embeddings won't be visualized. + | embeddings_metadata: Dictionary which maps embedding layer names to the + | filename of a file in which to save metadata for the embedding layer. + | In case the same metadata file is to be + | used for all embedding layers, a single filename can be passed. + | + | Examples: + | + | ```python + | tensorboard_callback = keras.callbacks.TensorBoard(log_dir="./logs") + | model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) + | # Then run the tensorboard command to view the visualizations. + | ``` + | + | Custom batch-level summaries in a subclassed Model: + | + | ```python + | class MyModel(keras.Model): + | + | def build(self, _): + | self.dense = keras.layers.Dense(10) + | + | def call(self, x): + | outputs = self.dense(x) + | tf.summary.histogram('outputs', outputs) + | return outputs + | + | model = MyModel() + | model.compile('sgd', 'mse') + | + | # Make sure to set `update_freq=N` to log a batch-level summary every N + | # batches. In addition to any `tf.summary` contained in `model.call()`, + | # metrics added in `Model.compile` will be logged every N batches. + | tb_callback = keras.callbacks.TensorBoard('./logs', update_freq=1) + | model.fit(x_train, y_train, callbacks=[tb_callback]) + | ``` + | + | Custom batch-level summaries in a Functional API Model: + | + | ```python + | def my_summary(x): + | tf.summary.histogram('x', x) + | return x + | + | inputs = keras.Input(10) + | x = keras.layers.Dense(10)(inputs) + | outputs = keras.layers.Lambda(my_summary)(x) + | model = keras.Model(inputs, outputs) + | model.compile('sgd', 'mse') + | + | # Make sure to set `update_freq=N` to log a batch-level summary every N + | # batches. In addition to any `tf.summary` contained in `Model.call`, + | # metrics added in `Model.compile` will be logged every N batches. + | tb_callback = keras.callbacks.TensorBoard('./logs', update_freq=1) + | model.fit(x_train, y_train, callbacks=[tb_callback]) + | ``` + | + | Profiling: + | + | ```python + | # Profile a single batch, e.g. the 5th batch. + | tensorboard_callback = keras.callbacks.TensorBoard( + | log_dir='./logs', profile_batch=5) + | model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) + | + | # Profile a range of batches, e.g. from 10 to 20. + | tensorboard_callback = keras.callbacks.TensorBoard( + | log_dir='./logs', profile_batch=(10,20)) + | model.fit(x_train, y_train, epochs=2, callbacks=[tensorboard_callback]) + | ``` + | + | Method resolution order: + | TensorBoard + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | log_dir='logs', + | histogram_freq=0, + | write_graph=True, + | write_images=False, + | write_steps_per_second=False, + | update_freq='epoch', + | profile_batch=0, + | embeddings_freq=0, + | embeddings_metadata=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | on_epoch_begin( + | self, + | epoch, + | logs=None + | ) + | Called at the start of an epoch. + | + | Subclasses should override for any actions to run. This function should + | only be called during TRAIN mode. + | + | Args: + | epoch: Integer, index of epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_epoch_end( + | self, + | epoch, + | logs=None + | ) + | Runs metrics and histogram summaries at epoch end. + | + | on_test_begin(self, logs=None) + | Called at the beginning of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_test_end(self, logs=None) + | Called at the end of evaluation or validation. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_test_batch_end()` is passed to this argument for this method + | but that may change in the future. + | + | on_train_batch_begin( + | self, + | batch, + | logs=None + | ) + | Called at the beginning of a training batch in `fit` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_train_batch_end( + | self, + | batch, + | logs=None + | ) + | Called at the end of a training batch in `fit` methods. + | + | Subclasses should override for any actions to run. + | + | Note that if the `steps_per_execution` argument to `compile` in + | `Model` is set to `N`, this method will only be called every + | `N` batches. + | + | Args: + | batch: Integer, index of batch within the current epoch. + | logs: Dict. Aggregated metric results up until this batch. + | + | on_train_begin(self, logs=None) + | Called at the beginning of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently no data is passed to this argument for this + | method but that may change in the future. + | + | on_train_end(self, logs=None) + | Called at the end of training. + | + | Subclasses should override for any actions to run. + | + | Args: + | logs: Dict. Currently the output of the last call to + | `on_epoch_end()` is passed to this argument for this method but + | that may change in the future. + | + | set_model(self, model) + | Sets Keras model and writes graph if specified. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | summary + | + diff --git a/.tether/man/callback_terminate_on_nan.txt b/.tether/man/callback_terminate_on_nan.txt new file mode 100644 index 0000000000..340bfedc6a --- /dev/null +++ b/.tether/man/callback_terminate_on_nan.txt @@ -0,0 +1,19 @@ +Help on class TerminateOnNaN in module keras.src.callbacks.terminate_on_nan: + +class TerminateOnNaN(keras.src.callbacks.callback.Callback) + | Callback that terminates training when a NaN loss is encountered. + | + | Method resolution order: + | TerminateOnNaN + | keras.src.callbacks.callback.Callback + | builtins.object + | + | Methods defined here: + | + | on_batch_end( + | self, + | batch, + | logs=None + | ) + | A backwards compatibility alias for `on_train_batch_end`. + | diff --git a/.tether/man/clear_session.txt b/.tether/man/clear_session.txt new file mode 100644 index 0000000000..a76be2fe91 --- /dev/null +++ b/.tether/man/clear_session.txt @@ -0,0 +1,50 @@ +__signature__ +keras.utils.clear_session(free_memory=True) +__doc__ +Resets all state generated by Keras. + +Keras manages a global state, which it uses to implement the Functional +model-building API and to uniquify autogenerated layer names. + +If you are creating many models in a loop, this global state will consume +an increasing amount of memory over time, and you may want to clear it. +Calling `clear_session()` releases the global state: this helps avoid +clutter from old models and layers, especially when memory is limited. + +Args: + free_memory: Whether to call Python garbage collection. + It's usually a good practice to call it to make sure + memory used by deleted objects is immediately freed. + However, it may take a few seconds to execute, so + when using `clear_session()` in a short loop, + you may want to skip it. + +Example 1: calling `clear_session()` when creating models in a loop + +```python +for _ in range(100): + # Without `clear_session()`, each iteration of this loop will + # slightly increase the size of the global state managed by Keras + model = keras.Sequential([ + keras.layers.Dense(10) for _ in range(10)]) + +for _ in range(100): + # With `clear_session()` called at the beginning, + # Keras starts with a blank state at each iteration + # and memory consumption is constant over time. + keras.backend.clear_session() + model = keras.Sequential([ + keras.layers.Dense(10) for _ in range(10)]) +``` + +Example 2: resetting the layer name generation counter + +>>> layers = [keras.layers.Dense(10) for _ in range(10)] +>>> new_layer = keras.layers.Dense(10) +>>> print(new_layer.name) +dense_10 +>>> keras.backend.clear_session() +>>> new_layer = keras.layers.Dense(10) +>>> print(new_layer.name) +dense + diff --git a/.tether/man/compile.keras.src.models.model.Model.txt b/.tether/man/compile.keras.src.models.model.Model.txt new file mode 100644 index 0000000000..f653386969 --- /dev/null +++ b/.tether/man/compile.keras.src.models.model.Model.txt @@ -0,0 +1,104 @@ +__signature__ +keras.Model.compile( + self, + optimizer='rmsprop', + loss=None, + loss_weights=None, + metrics=None, + weighted_metrics=None, + run_eagerly=False, + steps_per_execution=1, + jit_compile='auto', + auto_scale_loss=True +) +__doc__ +Configures the model for training. + +Example: + +```python +model.compile( + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + loss=keras.losses.BinaryCrossentropy(), + metrics=[ + keras.metrics.BinaryAccuracy(), + keras.metrics.FalseNegatives(), + ], +) +``` + +Args: + optimizer: String (name of optimizer) or optimizer instance. See + `keras.optimizers`. + loss: Loss function. May be a string (name of loss function), or + a `keras.losses.Loss` instance. See `keras.losses`. A + loss function is any callable with the signature + `loss = fn(y_true, y_pred)`, where `y_true` are the ground truth + values, and `y_pred` are the model's predictions. + `y_true` should have shape `(batch_size, d0, .. dN)` + (except in the case of sparse loss functions such as + sparse categorical crossentropy which expects integer arrays of + shape `(batch_size, d0, .. dN-1)`). + `y_pred` should have shape `(batch_size, d0, .. dN)`. + The loss function should return a float tensor. + loss_weights: Optional list or dictionary specifying scalar + coefficients (Python floats) to weight the loss contributions of + different model outputs. The loss value that will be minimized + by the model will then be the *weighted sum* of all individual + losses, weighted by the `loss_weights` coefficients. If a list, + it is expected to have a 1:1 mapping to the model's outputs. If + a dict, it is expected to map output names (strings) to scalar + coefficients. + metrics: List of metrics to be evaluated by the model during + training and testing. Each of this can be a string (name of a + built-in function), function or a `keras.metrics.Metric` + instance. See `keras.metrics`. Typically you will use + `metrics=['accuracy']`. A function is any callable with the + signature `result = fn(y_true, _pred)`. To specify different + metrics for different outputs of a multi-output model, you could + also pass a dictionary, such as + `metrics={'a':'accuracy', 'b':['accuracy', 'mse']}`. + You can also pass a list to specify a metric or a list of + metrics for each output, such as + `metrics=[['accuracy'], ['accuracy', 'mse']]` + or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass + the strings 'accuracy' or 'acc', we convert this to one of + `keras.metrics.BinaryAccuracy`, + `keras.metrics.CategoricalAccuracy`, + `keras.metrics.SparseCategoricalAccuracy` based on the + shapes of the targets and of the model output. A similar + conversion is done for the strings `"crossentropy"` + and `"ce"` as well. + The metrics passed here are evaluated without sample weighting; + if you would like sample weighting to apply, you can specify + your metrics via the `weighted_metrics` argument instead. + weighted_metrics: List of metrics to be evaluated and weighted by + `sample_weight` or `class_weight` during training and testing. + run_eagerly: Bool. If `True`, this model's forward pass + will never be compiled. It is recommended to leave this + as `False` when training (for best performance), + and to set it to `True` when debugging. + steps_per_execution: Int. The number of batches to run + during each a single compiled function call. Running multiple + batches inside a single compiled function call can + greatly improve performance on TPUs or small models with a large + Python overhead. At most, one full epoch will be run each + execution. If a number larger than the size of the epoch is + passed, the execution will be truncated to the size of the + epoch. Note that if `steps_per_execution` is set to `N`, + `Callback.on_batch_begin` and `Callback.on_batch_end` methods + will only be called every `N` batches (i.e. before/after + each compiled function execution). + Not supported with the PyTorch backend. + jit_compile: Bool or `"auto"`. Whether to use XLA compilation when + compiling a model. For `jax` and `tensorflow` backends, + `jit_compile="auto"` enables XLA compilation if the model + supports it, and disabled otherwise. + For `torch` backend, `"auto"` will default to eager + execution and `jit_compile=True` will run with `torch.compile` + with the `"inductor"` backend. + auto_scale_loss: Bool. If `True` and the model dtype policy is + `"mixed_float16"`, the passed optimizer will be automatically + wrapped in a `LossScaleOptimizer`, which will dynamically + scale the loss to prevent underflow. + diff --git a/.tether/man/config_backend.txt b/.tether/man/config_backend.txt new file mode 100644 index 0000000000..8ad3387984 --- /dev/null +++ b/.tether/man/config_backend.txt @@ -0,0 +1,13 @@ +__signature__ +keras.config.backend() +__doc__ +Publicly accessible method for determining the current backend. + +Returns: + String, the name of the backend Keras is currently using. One of + `"tensorflow"`, `"torch"`, or `"jax"`. + +Example: + +>>> keras.config.backend() +'tensorflow' diff --git a/.tether/man/config_disable_interactive_logging.txt b/.tether/man/config_disable_interactive_logging.txt new file mode 100644 index 0000000000..5b43eaa09b --- /dev/null +++ b/.tether/man/config_disable_interactive_logging.txt @@ -0,0 +1,8 @@ +__signature__ +keras.config.disable_interactive_logging() +__doc__ +Turn off interactive logging. + +When interactive logging is disabled, Keras sends logs to `absl.logging`. +This is the best option when using Keras in a non-interactive +way, such as running a training or inference job on a server. diff --git a/.tether/man/config_disable_traceback_filtering.txt b/.tether/man/config_disable_traceback_filtering.txt new file mode 100644 index 0000000000..3932d4eb5a --- /dev/null +++ b/.tether/man/config_disable_traceback_filtering.txt @@ -0,0 +1,18 @@ +__signature__ +keras.config.disable_traceback_filtering() +__doc__ +Turn off traceback filtering. + +Raw Keras tracebacks (also known as stack traces) +involve many internal frames, which can be +challenging to read through, while not being actionable for end users. +By default, Keras filters internal frames in most exceptions that it +raises, to keep traceback short, readable, and focused on what's +actionable for you (your own code). + +See also `keras.config.enable_traceback_filtering()` and +`keras.config.is_traceback_filtering_enabled()`. + +If you have previously disabled traceback filtering via +`keras.config.disable_traceback_filtering()`, you can re-enable it via +`keras.config.enable_traceback_filtering()`. diff --git a/.tether/man/config_dtype_policy.txt b/.tether/man/config_dtype_policy.txt new file mode 100644 index 0000000000..ebe439d4b2 --- /dev/null +++ b/.tether/man/config_dtype_policy.txt @@ -0,0 +1,6 @@ +__signature__ +keras.config.dtype_policy +() +__doc__ +Returns the current default dtype policy object. + diff --git a/.tether/man/config_enable_interactive_logging.txt b/.tether/man/config_enable_interactive_logging.txt new file mode 100644 index 0000000000..4875ab4769 --- /dev/null +++ b/.tether/man/config_enable_interactive_logging.txt @@ -0,0 +1,8 @@ +__signature__ +keras.config.enable_interactive_logging() +__doc__ +Turn on interactive logging. + +When interactive logging is enabled, Keras displays logs via stdout. +This provides the best experience when using Keras in an interactive +environment such as a shell or a notebook. diff --git a/.tether/man/config_enable_traceback_filtering.txt b/.tether/man/config_enable_traceback_filtering.txt new file mode 100644 index 0000000000..0c99767067 --- /dev/null +++ b/.tether/man/config_enable_traceback_filtering.txt @@ -0,0 +1,18 @@ +__signature__ +keras.config.enable_traceback_filtering() +__doc__ +Turn on traceback filtering. + +Raw Keras tracebacks (also known as stack traces) +involve many internal frames, which can be +challenging to read through, while not being actionable for end users. +By default, Keras filters internal frames in most exceptions that it +raises, to keep traceback short, readable, and focused on what's +actionable for you (your own code). + +See also `keras.config.disable_traceback_filtering()` and +`keras.config.is_traceback_filtering_enabled()`. + +If you have previously disabled traceback filtering via +`keras.config.disable_traceback_filtering()`, you can re-enable it via +`keras.config.enable_traceback_filtering()`. diff --git a/.tether/man/config_enable_unsafe_deserialization.txt b/.tether/man/config_enable_unsafe_deserialization.txt new file mode 100644 index 0000000000..b7176fc2c6 --- /dev/null +++ b/.tether/man/config_enable_unsafe_deserialization.txt @@ -0,0 +1,4 @@ +__signature__ +keras.config.enable_unsafe_deserialization() +__doc__ +Disables safe mode globally, allowing deserialization of lambdas. diff --git a/.tether/man/config_epsilon.txt b/.tether/man/config_epsilon.txt new file mode 100644 index 0000000000..850b5cc203 --- /dev/null +++ b/.tether/man/config_epsilon.txt @@ -0,0 +1,12 @@ +__signature__ +keras.config.epsilon() +__doc__ +Return the value of the fuzz factor used in numeric expressions. + +Returns: + A float. + +Example: + +>>> keras.config.epsilon() +1e-07 diff --git a/.tether/man/config_floatx.txt b/.tether/man/config_floatx.txt new file mode 100644 index 0000000000..8531aa8981 --- /dev/null +++ b/.tether/man/config_floatx.txt @@ -0,0 +1,15 @@ +__signature__ +keras.config.floatx() +__doc__ +Return the default float type, as a string. + +E.g. `'bfloat16'`, `'float16'`, `'float32'`, `'float64'`. + +Returns: + String, the current default float type. + +Example: + +>>> keras.config.floatx() +'float32' + diff --git a/.tether/man/config_image_data_format.txt b/.tether/man/config_image_data_format.txt new file mode 100644 index 0000000000..dc661563c4 --- /dev/null +++ b/.tether/man/config_image_data_format.txt @@ -0,0 +1,12 @@ +__signature__ +keras.config.image_data_format() +__doc__ +Return the default image data format convention. + +Returns: + A string, either `'channels_first'` or `'channels_last'`. + +Example: + +>>> keras.config.image_data_format() +'channels_last' diff --git a/.tether/man/config_is_interactive_logging_enabled.txt b/.tether/man/config_is_interactive_logging_enabled.txt new file mode 100644 index 0000000000..8c653d8338 --- /dev/null +++ b/.tether/man/config_is_interactive_logging_enabled.txt @@ -0,0 +1,13 @@ +__signature__ +keras.config.is_interactive_logging_enabled() +__doc__ +Check if interactive logging is enabled. + +To switch between writing logs to stdout and `absl.logging`, you may use +`keras.config.enable_interactive_logging()` and +`keras.config.disable_interactive_logging()`. + +Returns: + Boolean, `True` if interactive logging is enabled, + and `False` otherwise. + diff --git a/.tether/man/config_is_traceback_filtering_enabled.txt b/.tether/man/config_is_traceback_filtering_enabled.txt new file mode 100644 index 0000000000..0d71844ed5 --- /dev/null +++ b/.tether/man/config_is_traceback_filtering_enabled.txt @@ -0,0 +1,22 @@ +__signature__ +keras.config.is_traceback_filtering_enabled() +__doc__ +Check if traceback filtering is enabled. + +Raw Keras tracebacks (also known as stack traces) +involve many internal frames, which can be +challenging to read through, while not being actionable for end users. +By default, Keras filters internal frames in most exceptions that it +raises, to keep traceback short, readable, and focused on what's +actionable for you (your own code). + +See also `keras.config.enable_traceback_filtering()` and +`keras.config.disable_traceback_filtering()`. + +If you have previously disabled traceback filtering via +`keras.config.disable_traceback_filtering()`, you can re-enable it via +`keras.config.enable_traceback_filtering()`. + +Returns: + Boolean, `True` if traceback filtering is enabled, + and `False` otherwise. diff --git a/.tether/man/config_set_backend.txt b/.tether/man/config_set_backend.txt new file mode 100644 index 0000000000..a5f310e85f --- /dev/null +++ b/.tether/man/config_set_backend.txt @@ -0,0 +1,23 @@ +__signature__ +keras.config.set_backend(backend) +__doc__ +Reload the backend (and the Keras package). + +Example: + +```python +keras.config.set_backend("jax") +``` + +⚠️ WARNING ⚠️: Using this function is dangerous and should be done +carefully. Changing the backend will **NOT** convert +the type of any already-instantiated objects. +Thus, any layers / tensors / etc. already created will no +longer be usable without errors. It is strongly recommended **not** +to keep around **any** Keras-originated objects instances created +before calling `set_backend()`. + +This includes any function or class instance that uses any Keras +functionality. All such code needs to be re-executed after calling +`set_backend()`. + diff --git a/.tether/man/config_set_dtype_policy.txt b/.tether/man/config_set_dtype_policy.txt new file mode 100644 index 0000000000..16233453d0 --- /dev/null +++ b/.tether/man/config_set_dtype_policy.txt @@ -0,0 +1,9 @@ +__signature__ +keras.config.set_dtype_policy(policy) +__doc__ +Sets the default dtype policy globally. + +Example: + +>>> keras.config.set_dtype_policy("mixed_float16") + diff --git a/.tether/man/config_set_epsilon.txt b/.tether/man/config_set_epsilon.txt new file mode 100644 index 0000000000..d39ae4f41f --- /dev/null +++ b/.tether/man/config_set_epsilon.txt @@ -0,0 +1,18 @@ +__signature__ +keras.config.set_epsilon(value) +__doc__ +Set the value of the fuzz factor used in numeric expressions. + +Args: + value: float. New value of epsilon. + +Examples: +>>> keras.config.epsilon() +1e-07 + +>>> keras.config.set_epsilon(1e-5) +>>> keras.config.epsilon() +1e-05 + +>>> # Set it back to the default value. +>>> keras.config.set_epsilon(1e-7) diff --git a/.tether/man/config_set_floatx.txt b/.tether/man/config_set_floatx.txt new file mode 100644 index 0000000000..0acdec41b7 --- /dev/null +++ b/.tether/man/config_set_floatx.txt @@ -0,0 +1,28 @@ +__signature__ +keras.config.set_floatx(value) +__doc__ +Set the default float dtype. + +Note: It is not recommended to set this to `"float16"` for training, +as this will likely cause numeric stability issues. +Instead, mixed precision, which leverages +a mix of `float16` and `float32`. It can be configured by calling +`keras.mixed_precision.set_dtype_policy('mixed_float16')`. + +Args: + value: String; `'bfloat16'`, `'float16'`, `'float32'`, or `'float64'`. + +Examples: +>>> keras.config.floatx() +'float32' + +>>> keras.config.set_floatx('float64') +>>> keras.config.floatx() +'float64' + +>>> # Set it back to float32 +>>> keras.config.set_floatx('float32') + +Raises: + ValueError: In case of invalid value. + diff --git a/.tether/man/config_set_image_data_format.txt b/.tether/man/config_set_image_data_format.txt new file mode 100644 index 0000000000..b1c568e254 --- /dev/null +++ b/.tether/man/config_set_image_data_format.txt @@ -0,0 +1,19 @@ +__signature__ +keras.config.set_image_data_format(data_format) +__doc__ +Set the value of the image data format convention. + +Args: + data_format: string. `'channels_first'` or `'channels_last'`. + +Examples: + +>>> keras.config.image_data_format() +'channels_last' + +>>> keras.config.set_image_data_format('channels_first') +>>> keras.config.image_data_format() +'channels_first' + +>>> # Set it back to `'channels_last'` +>>> keras.config.set_image_data_format('channels_last') diff --git a/.tether/man/constraint_maxnorm.txt b/.tether/man/constraint_maxnorm.txt new file mode 100644 index 0000000000..0c7f787ecd --- /dev/null +++ b/.tether/man/constraint_maxnorm.txt @@ -0,0 +1,62 @@ +Help on class MaxNorm in module keras.src.constraints.constraints: + +class MaxNorm(Constraint) + | MaxNorm(max_value=2, axis=0) + | + | MaxNorm weight constraint. + | + | Constrains the weights incident to each hidden unit + | to have a norm less than or equal to a desired value. + | + | Also available via the shortcut function `keras.constraints.max_norm`. + | + | Args: + | max_value: the maximum norm value for the incoming weights. + | axis: integer, axis along which to calculate weight norms. + | For instance, in a `Dense` layer the weight matrix + | has shape `(input_dim, output_dim)`, + | set `axis` to `0` to constrain each weight vector + | of length `(input_dim,)`. + | In a `Conv2D` layer with `data_format="channels_last"`, + | the weight tensor has shape + | `(rows, cols, input_depth, output_depth)`, + | set `axis` to `[0, 1, 2]` + | to constrain the weights of each filter tensor of size + | `(rows, cols, input_depth)`. + | + | Method resolution order: + | MaxNorm + | Constraint + | builtins.object + | + | Methods defined here: + | + | __call__(self, w) + | Applies the constraint to the input weight variable. + | + | By default, the inputs weight variable is not modified. + | Users should override this method to implement their own projection + | function. + | + | Args: + | w: Input weight variable. + | + | Returns: + | Projected variable (by default, returns unmodified inputs). + | + | __init__( + | self, + | max_value=2, + | axis=0 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns a Python dict of the object config. + | + | A constraint config is a Python dictionary (JSON-serializable) that can + | be used to reinstantiate the same object. + | + | Returns: + | Python dict containing the configuration of the constraint object. + | diff --git a/.tether/man/constraint_minmaxnorm.txt b/.tether/man/constraint_minmaxnorm.txt new file mode 100644 index 0000000000..1b244352d3 --- /dev/null +++ b/.tether/man/constraint_minmaxnorm.txt @@ -0,0 +1,70 @@ +Help on class MinMaxNorm in module keras.src.constraints.constraints: + +class MinMaxNorm(Constraint) + | MinMaxNorm(min_value=0.0, max_value=1.0, rate=1.0, axis=0) + | + | MinMaxNorm weight constraint. + | + | Constrains the weights incident to each hidden unit + | to have the norm between a lower bound and an upper bound. + | + | Args: + | min_value: the minimum norm for the incoming weights. + | max_value: the maximum norm for the incoming weights. + | rate: rate for enforcing the constraint: weights will be + | rescaled to yield + | `(1 - rate) * norm + rate * norm.clip(min_value, max_value)`. + | Effectively, this means that rate=1.0 stands for strict + | enforcement of the constraint, while rate<1.0 means that + | weights will be rescaled at each step to slowly move + | towards a value inside the desired interval. + | axis: integer, axis along which to calculate weight norms. + | For instance, in a `Dense` layer the weight matrix + | has shape `(input_dim, output_dim)`, + | set `axis` to `0` to constrain each weight vector + | of length `(input_dim,)`. + | In a `Conv2D` layer with `data_format="channels_last"`, + | the weight tensor has shape + | `(rows, cols, input_depth, output_depth)`, + | set `axis` to `[0, 1, 2]` + | to constrain the weights of each filter tensor of size + | `(rows, cols, input_depth)`. + | + | Method resolution order: + | MinMaxNorm + | Constraint + | builtins.object + | + | Methods defined here: + | + | __call__(self, w) + | Applies the constraint to the input weight variable. + | + | By default, the inputs weight variable is not modified. + | Users should override this method to implement their own projection + | function. + | + | Args: + | w: Input weight variable. + | + | Returns: + | Projected variable (by default, returns unmodified inputs). + | + | __init__( + | self, + | min_value=0.0, + | max_value=1.0, + | rate=1.0, + | axis=0 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns a Python dict of the object config. + | + | A constraint config is a Python dictionary (JSON-serializable) that can + | be used to reinstantiate the same object. + | + | Returns: + | Python dict containing the configuration of the constraint object. + | diff --git a/.tether/man/constraint_nonneg.txt b/.tether/man/constraint_nonneg.txt new file mode 100644 index 0000000000..3affdff1b0 --- /dev/null +++ b/.tether/man/constraint_nonneg.txt @@ -0,0 +1,25 @@ +Help on class NonNeg in module keras.src.constraints.constraints: + +class NonNeg(Constraint) + | Constrains the weights to be non-negative. + | + | Method resolution order: + | NonNeg + | Constraint + | builtins.object + | + | Methods defined here: + | + | __call__(self, w) + | Applies the constraint to the input weight variable. + | + | By default, the inputs weight variable is not modified. + | Users should override this method to implement their own projection + | function. + | + | Args: + | w: Input weight variable. + | + | Returns: + | Projected variable (by default, returns unmodified inputs). + | diff --git a/.tether/man/constraint_unitnorm.txt b/.tether/man/constraint_unitnorm.txt new file mode 100644 index 0000000000..d0806d7671 --- /dev/null +++ b/.tether/man/constraint_unitnorm.txt @@ -0,0 +1,52 @@ +Help on class UnitNorm in module keras.src.constraints.constraints: + +class UnitNorm(Constraint) + | UnitNorm(axis=0) + | + | Constrains the weights incident to each hidden unit to have unit norm. + | + | Args: + | axis: integer, axis along which to calculate weight norms. + | For instance, in a `Dense` layer the weight matrix + | has shape `(input_dim, output_dim)`, + | set `axis` to `0` to constrain each weight vector + | of length `(input_dim,)`. + | In a `Conv2D` layer with `data_format="channels_last"`, + | the weight tensor has shape + | `(rows, cols, input_depth, output_depth)`, + | set `axis` to `[0, 1, 2]` + | to constrain the weights of each filter tensor of size + | `(rows, cols, input_depth)`. + | + | Method resolution order: + | UnitNorm + | Constraint + | builtins.object + | + | Methods defined here: + | + | __call__(self, w) + | Applies the constraint to the input weight variable. + | + | By default, the inputs weight variable is not modified. + | Users should override this method to implement their own projection + | function. + | + | Args: + | w: Input weight variable. + | + | Returns: + | Projected variable (by default, returns unmodified inputs). + | + | __init__(self, axis=0) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns a Python dict of the object config. + | + | A constraint config is a Python dictionary (JSON-serializable) that can + | be used to reinstantiate the same object. + | + | Returns: + | Python dict containing the configuration of the constraint object. + | diff --git a/.tether/man/evaluate.keras.src.models.model.Model.txt b/.tether/man/evaluate.keras.src.models.model.Model.txt new file mode 100644 index 0000000000..896bd3b0bd --- /dev/null +++ b/.tether/man/evaluate.keras.src.models.model.Model.txt @@ -0,0 +1,74 @@ +__signature__ +keras.Model.evaluate( + self, + x=None, + y=None, + batch_size=None, + verbose='auto', + sample_weight=None, + steps=None, + callbacks=None, + return_dict=False, + **kwargs +) +__doc__ +Returns the loss value & metrics values for the model in test mode. + +Computation is done in batches (see the `batch_size` arg.) + +Args: + x: Input data. It could be: + - A NumPy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data.Dataset`. Should return a tuple + of either `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + - A generator or `keras.utils.PyDataset` returning + `(inputs, targets)` or `(inputs, targets, sample_weights)`. + y: Target data. Like the input data `x`, it could be either NumPy + array(s) or backend-native tensor(s). + If `x` is a `tf.data.Dataset` or `keras.utils.PyDataset` + instance, `y` should not be specified + (since targets will be obtained from the iterator/dataset). + batch_size: Integer or `None`. Number of samples per batch of + computation. If unspecified, `batch_size` will default to 32. Do + not specify the `batch_size` if your data is in the form of a + dataset, generators, or `keras.utils.PyDataset` instances + (since they generate batches). + verbose: `"auto"`, 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = single line. + `"auto"` becomes 1 for most cases. + Note that the progress bar is not + particularly useful when logged to a file, so `verbose=2` is + recommended when not running interactively + (e.g. in a production environment). Defaults to `"auto"`. + sample_weight: Optional NumPy array of weights for the test samples, + used for weighting the loss function. You can either pass a flat + (1D) NumPy array with the same length as the input samples + (1:1 mapping between weights and samples), or in the case of + temporal data, you can pass a 2D array with shape `(samples, + sequence_length)`, to apply a different weight to every + timestep of every sample. This argument is not supported when + `x` is a dataset, instead pass sample weights as the third + element of `x`. + steps: Integer or `None`. Total number of steps (batches of samples) + before declaring the evaluation round finished. Ignored with the + default value of `None`. If `x` is a `tf.data.Dataset` and + `steps` is `None`, evaluation will run until the dataset + is exhausted. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during evaluation. + return_dict: If `True`, loss and metric results are returned as a + dict, with each key being the name of the metric. + If `False`, they are returned as a list. + +Returns: + Scalar test loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + diff --git a/.tether/man/export_savedmodel.keras.src.models.model.Model.txt b/.tether/man/export_savedmodel.keras.src.models.model.Model.txt new file mode 100644 index 0000000000..afba37b4b8 --- /dev/null +++ b/.tether/man/export_savedmodel.keras.src.models.model.Model.txt @@ -0,0 +1,40 @@ +__signature__ +keras.Model.export( + self, + filepath, + format='tf_saved_model' +) +__doc__ +Create a TF SavedModel artifact for inference. + +**Note:** This can currently only be used with +the TensorFlow or JAX backends. + +This method lets you export a model to a lightweight SavedModel artifact +that contains the model's forward pass only (its `call()` method) +and can be served via e.g. TF-Serving. The forward pass is registered +under the name `serve()` (see example below). + +The original code of the model (including any custom layers you may +have used) is *no longer* necessary to reload the artifact -- it is +entirely standalone. + +Args: + filepath: `str` or `pathlib.Path` object. Path where to save + the artifact. + +Example: + +```python +# Create the artifact +model.export("path/to/location") + +# Later, in a different process / environment... +reloaded_artifact = tf.saved_model.load("path/to/location") +predictions = reloaded_artifact.serve(input_data) +``` + +If you would like to customize your serving endpoints, you can +use the lower-level `keras.export.ExportArchive` class. The +`export()` method relies on `ExportArchive` internally. + diff --git a/.tether/man/feature_cross.txt b/.tether/man/feature_cross.txt new file mode 100644 index 0000000000..c4867bba22 --- /dev/null +++ b/.tether/man/feature_cross.txt @@ -0,0 +1,7 @@ +__signature__ +keras.utils.FeatureSpace.cross( + feature_names, + crossing_dim, + output_mode='one_hot' +) +__doc__ diff --git a/.tether/man/feature_custom.txt b/.tether/man/feature_custom.txt new file mode 100644 index 0000000000..308bb32f94 --- /dev/null +++ b/.tether/man/feature_custom.txt @@ -0,0 +1,7 @@ +__signature__ +keras.utils.FeatureSpace.feature( + dtype, + preprocessor, + output_mode +) +__doc__ diff --git a/.tether/man/feature_float.txt b/.tether/man/feature_float.txt new file mode 100644 index 0000000000..61842bed8e --- /dev/null +++ b/.tether/man/feature_float.txt @@ -0,0 +1,3 @@ +__signature__ +keras.utils.FeatureSpace.float(name=None) +__doc__ diff --git a/.tether/man/feature_float_discretized.txt b/.tether/man/feature_float_discretized.txt new file mode 100644 index 0000000000..512b198830 --- /dev/null +++ b/.tether/man/feature_float_discretized.txt @@ -0,0 +1,8 @@ +__signature__ +keras.utils.FeatureSpace.float_discretized( + num_bins, + bin_boundaries=None, + output_mode='one_hot', + name=None +) +__doc__ diff --git a/.tether/man/feature_float_normalized.txt b/.tether/man/feature_float_normalized.txt new file mode 100644 index 0000000000..64e211db93 --- /dev/null +++ b/.tether/man/feature_float_normalized.txt @@ -0,0 +1,3 @@ +__signature__ +keras.utils.FeatureSpace.float_normalized(name=None) +__doc__ diff --git a/.tether/man/feature_float_rescaled.txt b/.tether/man/feature_float_rescaled.txt new file mode 100644 index 0000000000..392149092f --- /dev/null +++ b/.tether/man/feature_float_rescaled.txt @@ -0,0 +1,7 @@ +__signature__ +keras.utils.FeatureSpace.float_rescaled( + scale=1.0, + offset=0.0, + name=None +) +__doc__ diff --git a/.tether/man/feature_integer_categorical.txt b/.tether/man/feature_integer_categorical.txt new file mode 100644 index 0000000000..1dd0abee3c --- /dev/null +++ b/.tether/man/feature_integer_categorical.txt @@ -0,0 +1,8 @@ +__signature__ +keras.utils.FeatureSpace.integer_categorical( + max_tokens=None, + num_oov_indices=1, + output_mode='one_hot', + name=None +) +__doc__ diff --git a/.tether/man/feature_integer_hashed.txt b/.tether/man/feature_integer_hashed.txt new file mode 100644 index 0000000000..767c7bec72 --- /dev/null +++ b/.tether/man/feature_integer_hashed.txt @@ -0,0 +1,7 @@ +__signature__ +keras.utils.FeatureSpace.integer_hashed( + num_bins, + output_mode='one_hot', + name=None +) +__doc__ diff --git a/.tether/man/feature_string_categorical.txt b/.tether/man/feature_string_categorical.txt new file mode 100644 index 0000000000..8371733a97 --- /dev/null +++ b/.tether/man/feature_string_categorical.txt @@ -0,0 +1,8 @@ +__signature__ +keras.utils.FeatureSpace.string_categorical( + max_tokens=None, + num_oov_indices=1, + output_mode='one_hot', + name=None +) +__doc__ diff --git a/.tether/man/feature_string_hashed.txt b/.tether/man/feature_string_hashed.txt new file mode 100644 index 0000000000..9f518e5f76 --- /dev/null +++ b/.tether/man/feature_string_hashed.txt @@ -0,0 +1,7 @@ +__signature__ +keras.utils.FeatureSpace.string_hashed( + num_bins, + output_mode='one_hot', + name=None +) +__doc__ diff --git a/.tether/man/fit.keras.src.models.model.Model.txt b/.tether/man/fit.keras.src.models.model.Model.txt new file mode 100644 index 0000000000..5f1bfa1498 --- /dev/null +++ b/.tether/man/fit.keras.src.models.model.Model.txt @@ -0,0 +1,196 @@ +__signature__ +keras.Model.fit( + self, + x=None, + y=None, + batch_size=None, + epochs=1, + verbose='auto', + callbacks=None, + validation_split=0.0, + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + validation_batch_size=None, + validation_freq=1 +) +__doc__ +Trains the model for a fixed number of epochs (dataset iterations). + +Args: + x: Input data. It could be: + - A NumPy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A tensor, or a list of tensors + (in case the model has multiple inputs). + - A dict mapping input names to the corresponding array/tensors, + if the model has named inputs. + - A `tf.data.Dataset`. Should return a tuple + of either `(inputs, targets)` or + `(inputs, targets, sample_weights)`. + - A `keras.utils.PyDataset` returning `(inputs, + targets)` or `(inputs, targets, sample_weights)`. + y: Target data. Like the input data `x`, + it could be either NumPy array(s) or backend-native tensor(s). + If `x` is a dataset, generator, + or `keras.utils.PyDataset` instance, `y` should + not be specified (since targets will be obtained from `x`). + batch_size: Integer or `None`. + Number of samples per gradient update. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of datasets, generators, or `keras.utils.PyDataset` + instances (since they generate batches). + epochs: Integer. Number of epochs to train the model. + An epoch is an iteration over the entire `x` and `y` + data provided + (unless the `steps_per_epoch` flag is set to + something other than None). + Note that in conjunction with `initial_epoch`, + `epochs` is to be understood as "final epoch". + The model is not trained for a number of iterations + given by `epochs`, but merely until the epoch + of index `epochs` is reached. + verbose: `"auto"`, 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + "auto" becomes 1 for most cases. + Note that the progress bar is not + particularly useful when logged to a file, + so `verbose=2` is recommended when not running interactively + (e.g., in a production environment). Defaults to `"auto"`. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during training. + See `keras.callbacks`. Note + `keras.callbacks.ProgbarLogger` and + `keras.callbacks.History` callbacks are created + automatically and need not be passed to `model.fit()`. + `keras.callbacks.ProgbarLogger` is created + or not based on the `verbose` argument in `model.fit()`. + validation_split: Float between 0 and 1. + Fraction of the training data to be used as validation data. + The model will set apart this fraction of the training data, + will not train on it, and will evaluate + the loss and any model metrics + on this data at the end of each epoch. + The validation data is selected from the last samples + in the `x` and `y` data provided, before shuffling. This + argument is not supported when `x` is a dataset, generator or + `keras.utils.PyDataset` instance. + If both `validation_data` and `validation_split` are provided, + `validation_data` will override `validation_split`. + validation_data: Data on which to evaluate + the loss and any model metrics at the end of each epoch. + The model will not be trained on this data. Thus, note the fact + that the validation loss of data provided using + `validation_split` or `validation_data` is not affected by + regularization layers like noise and dropout. + `validation_data` will override `validation_split`. + It could be: + - A tuple `(x_val, y_val)` of NumPy arrays or tensors. + - A tuple `(x_val, y_val, val_sample_weights)` of NumPy + arrays. + - A `tf.data.Dataset`. + - A Python generator or `keras.utils.PyDataset` returning + `(inputs, targets)` or `(inputs, targets, sample_weights)`. + shuffle: Boolean, whether to shuffle the training data + before each epoch. This argument is + ignored when `x` is a generator or a `tf.data.Dataset`. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) value, used for weighting the loss function + (during training only). + This can be useful to tell the model to + "pay more attention" to samples from + an under-represented class. When `class_weight` is specified + and targets have a rank of 2 or greater, either `y` must be + one-hot encoded, or an explicit final dimension of `1` must + be included for sparse class labels. + sample_weight: Optional NumPy array of weights for + the training samples, used for weighting the loss function + (during training only). You can either pass a flat (1D) + NumPy array with the same length as the input samples + (1:1 mapping between weights and samples), + or in the case of temporal data, + you can pass a 2D array with shape + `(samples, sequence_length)`, + to apply a different weight to every timestep of every sample. + This argument is not supported when `x` is a dataset, generator, + or `keras.utils.PyDataset` instance, instead provide the + sample_weights as the third element of `x`. + Note that sample weighting does not apply to metrics specified + via the `metrics` argument in `compile()`. To apply sample + weighting to your metrics, you can specify them via the + `weighted_metrics` in `compile()` instead. + initial_epoch: Integer. + Epoch at which to start training + (useful for resuming a previous training run). + steps_per_epoch: Integer or `None`. + Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. When training with input tensors such as + backend-native tensors, the default `None` is equal to + the number of samples in your dataset divided by + the batch size, or 1 if that cannot be determined. If `x` is a + `tf.data.Dataset`, and `steps_per_epoch` + is `None`, the epoch will run until the input dataset is + exhausted. When passing an infinitely repeating dataset, you + must specify the `steps_per_epoch` argument. If + `steps_per_epoch=-1` the training will run indefinitely with an + infinitely repeating dataset. + validation_steps: Only relevant if `validation_data` is provided. + Total number of steps (batches of + samples) to draw before stopping when performing validation + at the end of every epoch. If `validation_steps` is `None`, + validation will run until the `validation_data` dataset is + exhausted. In the case of an infinitely repeated dataset, it + will run into an infinite loop. If `validation_steps` is + specified and only part of the dataset will be consumed, the + evaluation will start from the beginning of the dataset at each + epoch. This ensures that the same validation samples are used + every time. + validation_batch_size: Integer or `None`. + Number of samples per validation batch. + If unspecified, will default to `batch_size`. + Do not specify the `validation_batch_size` if your data is in + the form of datasets or `keras.utils.PyDataset` + instances (since they generate batches). + validation_freq: Only relevant if validation data is provided. + Specifies how many training epochs to run + before a new validation run is performed, + e.g. `validation_freq=2` runs validation every 2 epochs. + +Unpacking behavior for iterator-like inputs: + A common pattern is to pass an iterator like object such as a + `tf.data.Dataset` or a `keras.utils.PyDataset` to `fit()`, + which will in fact yield not only features (`x`) + but optionally targets (`y`) and sample weights (`sample_weight`). + Keras requires that the output of such iterator-likes be + unambiguous. The iterator should return a tuple + of length 1, 2, or 3, where the optional second and third elements + will be used for `y` and `sample_weight` respectively. + Any other type provided will be wrapped in + a length-one tuple, effectively treating everything as `x`. When + yielding dicts, they should still adhere to the top-level tuple + structure, + e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate + features, targets, and weights from the keys of a single dict. + A notable unsupported data type is the `namedtuple`. The reason is + that it behaves like both an ordered datatype (tuple) and a mapping + datatype (dict). So given a namedtuple of the form: + `namedtuple("example_tuple", ["y", "x"])` + it is ambiguous whether to reverse the order of the elements when + interpreting the value. Even worse is a tuple of the form: + `namedtuple("other_tuple", ["x", "y", "z"])` + where it is unclear if the tuple was intended to be unpacked + into `x`, `y`, and `sample_weight` or passed through + as a single element to `x`. + +Returns: + A `History` object. Its `History.history` attribute is + a record of training loss values and metrics values + at successive epochs, as well as validation loss values + and validation metrics values (if applicable). + diff --git a/.tether/man/get_custom_objects.txt b/.tether/man/get_custom_objects.txt new file mode 100644 index 0000000000..49e3a0f0c9 --- /dev/null +++ b/.tether/man/get_custom_objects.txt @@ -0,0 +1,19 @@ +__signature__ +keras.saving.get_custom_objects() +__doc__ +Retrieves a live reference to the global dictionary of custom objects. + +Custom objects set using `custom_object_scope()` are not added to the +global dictionary of custom objects, and will not appear in the returned +dictionary. + +Example: + +```python +get_custom_objects().clear() +get_custom_objects()['MyObject'] = MyObject +``` + +Returns: + Global dictionary mapping registered class names to classes. + diff --git a/.tether/man/get_file.txt b/.tether/man/get_file.txt new file mode 100644 index 0000000000..ddb3a10ad3 --- /dev/null +++ b/.tether/man/get_file.txt @@ -0,0 +1,76 @@ +__signature__ +keras.utils.get_file( + fname=None, + origin=None, + untar=False, + md5_hash=None, + file_hash=None, + cache_subdir='datasets', + hash_algorithm='auto', + extract=False, + archive_format='auto', + cache_dir=None, + force_download=False +) +__doc__ +Downloads a file from a URL if it not already in the cache. + +By default the file at the url `origin` is downloaded to the +cache_dir `~/.keras`, placed in the cache_subdir `datasets`, +and given the filename `fname`. The final location of a file +`example.txt` would therefore be `~/.keras/datasets/example.txt`. +Files in `.tar`, `.tar.gz`, `.tar.bz`, and `.zip` formats can +also be extracted. + +Passing a hash will verify the file after download. The command line +programs `shasum` and `sha256sum` can compute the hash. + +Example: + +```python +path_to_downloaded_file = get_file( + origin="https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz", + extract=True, +) +``` + +Args: + fname: Name of the file. If an absolute path, e.g. `"/path/to/file.txt"` + is specified, the file will be saved at that location. + If `None`, the name of the file at `origin` will be used. + origin: Original URL of the file. + untar: Deprecated in favor of `extract` argument. + boolean, whether the file should be decompressed + md5_hash: Deprecated in favor of `file_hash` argument. + md5 hash of the file for verification + file_hash: The expected hash string of the file after download. + The sha256 and md5 hash algorithms are both supported. + cache_subdir: Subdirectory under the Keras cache dir where the file is + saved. If an absolute path, e.g. `"/path/to/folder"` is + specified, the file will be saved at that location. + hash_algorithm: Select the hash algorithm to verify the file. + options are `"md5'`, `"sha256'`, and `"auto'`. + The default 'auto' detects the hash algorithm in use. + extract: True tries extracting the file as an Archive, like tar or zip. + archive_format: Archive format to try for extracting the file. + Options are `"auto'`, `"tar'`, `"zip'`, and `None`. + `"tar"` includes tar, tar.gz, and tar.bz files. + The default `"auto"` corresponds to `["tar", "zip"]`. + None or an empty list will return no matches found. + cache_dir: Location to store cached files, when None it + defaults ether `$KERAS_HOME` if the `KERAS_HOME` environment + variable is set or `~/.keras/`. + force_download: If `True`, the file will always be re-downloaded + regardless of the cache state. + +Returns: + Path to the downloaded file. + +**⚠️ Warning on malicious downloads ⚠️** + +Downloading something from the Internet carries a risk. +NEVER download a file/archive if you do not trust the source. +We recommend that you specify the `file_hash` argument +(if the hash of the source file is known) to make sure that the file you +are getting is the one you expect. + diff --git a/.tether/man/get_registered_name.txt b/.tether/man/get_registered_name.txt new file mode 100644 index 0000000000..f1b2a63d67 --- /dev/null +++ b/.tether/man/get_registered_name.txt @@ -0,0 +1,16 @@ +__signature__ +keras.saving.get_registered_name(obj) +__doc__ +Returns the name registered to an object within the Keras framework. + +This function is part of the Keras serialization and deserialization +framework. It maps objects to the string names associated with those objects +for serialization/deserialization. + +Args: + obj: The object to look up. + +Returns: + The name associated with the object, or the default Python name if the + object is not registered. + diff --git a/.tether/man/get_registered_object.txt b/.tether/man/get_registered_object.txt new file mode 100644 index 0000000000..3c7cc558cd --- /dev/null +++ b/.tether/man/get_registered_object.txt @@ -0,0 +1,34 @@ +__signature__ +keras.saving.get_registered_object( + name, + custom_objects=None, + module_objects=None +) +__doc__ +Returns the class associated with `name` if it is registered with Keras. + +This function is part of the Keras serialization and deserialization +framework. It maps strings to the objects associated with them for +serialization/deserialization. + +Example: + +```python +def from_config(cls, config, custom_objects=None): + if 'my_custom_object_name' in config: + config['hidden_cls'] = tf.keras.saving.get_registered_object( + config['my_custom_object_name'], custom_objects=custom_objects) +``` + +Args: + name: The name to look up. + custom_objects: A dictionary of custom objects to look the name up in. + Generally, custom_objects is provided by the user. + module_objects: A dictionary of custom objects to look the name up in. + Generally, module_objects is provided by midlevel library + implementers. + +Returns: + An instantiable class associated with `name`, or `None` if no such class + exists. + diff --git a/.tether/man/get_source_inputs.txt b/.tether/man/get_source_inputs.txt new file mode 100644 index 0000000000..0397c25b60 --- /dev/null +++ b/.tether/man/get_source_inputs.txt @@ -0,0 +1,13 @@ +__signature__ +keras.utils.get_source_inputs(tensor) +__doc__ +Returns the list of input tensors necessary to compute `tensor`. + +Output will always be a list of tensors +(potentially with 1 element). + +Args: + tensor: The tensor to start from. + +Returns: + List of input tensors. diff --git a/.tether/man/image_array_save.txt b/.tether/man/image_array_save.txt new file mode 100644 index 0000000000..655cf471bd --- /dev/null +++ b/.tether/man/image_array_save.txt @@ -0,0 +1,22 @@ +__signature__ +keras.utils.save_img( + path, + x, + data_format=None, + file_format=None, + scale=True, + **kwargs +) +__doc__ +Saves an image stored as a NumPy array to a path or file object. + +Args: + path: Path or file object. + x: NumPy array. + data_format: Image data format, either `"channels_first"` or + `"channels_last"`. + file_format: Optional file format override. If omitted, the format to + use is determined from the filename extension. If a file object was + used instead of a filename, this parameter should always be used. + scale: Whether to rescale image values to be within `[0, 255]`. + **kwargs: Additional keyword arguments passed to `PIL.Image.save()`. diff --git a/.tether/man/image_dataset_from_directory.txt b/.tether/man/image_dataset_from_directory.txt new file mode 100644 index 0000000000..25a014477a --- /dev/null +++ b/.tether/man/image_dataset_from_directory.txt @@ -0,0 +1,144 @@ +__signature__ +keras.utils.image_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + color_mode='rgb', + batch_size=32, + image_size=(256, 256), + shuffle=True, + seed=None, + validation_split=None, + subset=None, + interpolation='bilinear', + follow_links=False, + crop_to_aspect_ratio=False, + pad_to_aspect_ratio=False, + data_format=None, + verbose=True +) +__doc__ +Generates a `tf.data.Dataset` from image files in a directory. + +If your directory structure is: + +``` +main_directory/ +...class_a/ +......a_image_1.jpg +......a_image_2.jpg +...class_b/ +......b_image_1.jpg +......b_image_2.jpg +``` + +Then calling `image_dataset_from_directory(main_directory, +labels='inferred')` will return a `tf.data.Dataset` that yields batches of +images from the subdirectories `class_a` and `class_b`, together with labels +0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). + +Supported image formats: `.jpeg`, `.jpg`, `.png`, `.bmp`, `.gif`. +Animated gifs are truncated to the first frame. + +Args: + directory: Directory where the data is located. + If `labels` is `"inferred"`, it should contain + subdirectories, each containing images for a class. + Otherwise, the directory structure is ignored. + labels: Either `"inferred"` + (labels are generated from the directory structure), + `None` (no labels), + or a list/tuple of integer labels of the same size as the number of + image files found in the directory. Labels should be sorted + according to the alphanumeric order of the image file paths + (obtained via `os.walk(directory)` in Python). + label_mode: String describing the encoding of `labels`. Options are: + - `"int"`: means that the labels are encoded as integers + (e.g. for `sparse_categorical_crossentropy` loss). + - `"categorical"` means that the labels are + encoded as a categorical vector + (e.g. for `categorical_crossentropy` loss). + - `"binary"` means that the labels (there can be only 2) + are encoded as `float32` scalars with values 0 or 1 + (e.g. for `binary_crossentropy`). + - `None` (no labels). + class_names: Only valid if `labels` is `"inferred"`. + This is the explicit list of class names + (must match names of subdirectories). Used to control the order + of the classes (otherwise alphanumerical order is used). + color_mode: One of `"grayscale"`, `"rgb"`, `"rgba"`. + Defaults to `"rgb"`. Whether the images will be converted to + have 1, 3, or 4 channels. + batch_size: Size of the batches of data. Defaults to 32. + If `None`, the data will not be batched + (the dataset will yield individual samples). + image_size: Size to resize images to after they are read from disk, + specified as `(height, width)`. Defaults to `(256, 256)`. + Since the pipeline processes batches of images that must all have + the same size, this must be provided. + shuffle: Whether to shuffle the data. Defaults to `True`. + If set to `False`, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + validation_split: Optional float between 0 and 1, + fraction of data to reserve for validation. + subset: Subset of the data to return. + One of `"training"`, `"validation"`, or `"both"`. + Only used if `validation_split` is set. + When `subset="both"`, the utility returns a tuple of two datasets + (the training and validation datasets respectively). + interpolation: String, the interpolation method used when + resizing images. Defaults to `"bilinear"`. + Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, + `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. + follow_links: Whether to visit subdirectories pointed to by symlinks. + Defaults to `False`. + crop_to_aspect_ratio: If `True`, resize the images without aspect + ratio distortion. When the original aspect ratio differs from the + target aspect ratio, the output image will be cropped so as to + return the largest possible window in the image + (of size `image_size`) that matches the target aspect ratio. By + default (`crop_to_aspect_ratio=False`), aspect ratio may not be + preserved. + pad_to_aspect_ratio: If `True`, resize the images without aspect + ratio distortion. When the original aspect ratio differs from the + target aspect ratio, the output image will be padded so as to + return the largest possible window in the image + (of size `image_size`) that matches the target aspect ratio. By + default (`pad_to_aspect_ratio=False`), aspect ratio may not be + preserved. + data_format: If None uses keras.config.image_data_format() + otherwise either 'channel_last' or 'channel_first'. + verbose: Whether to display number information on classes and + number of files found. Defaults to `True`. + +Returns: + +A `tf.data.Dataset` object. + +- If `label_mode` is `None`, it yields `float32` tensors of shape + `(batch_size, image_size[0], image_size[1], num_channels)`, + encoding images (see below for rules regarding `num_channels`). +- Otherwise, it yields a tuple `(images, labels)`, where `images` has + shape `(batch_size, image_size[0], image_size[1], num_channels)`, + and `labels` follows the format described below. + +Rules regarding labels format: + +- if `label_mode` is `"int"`, the labels are an `int32` tensor of shape + `(batch_size,)`. +- if `label_mode` is `"binary"`, the labels are a `float32` tensor of + 1s and 0s of shape `(batch_size, 1)`. +- if `label_mode` is `"categorical"`, the labels are a `float32` tensor + of shape `(batch_size, num_classes)`, representing a one-hot + encoding of the class index. + +Rules regarding number of channels in the yielded images: + +- if `color_mode` is `"grayscale"`, + there's 1 channel in the image tensors. +- if `color_mode` is `"rgb"`, + there are 3 channels in the image tensors. +- if `color_mode` is `"rgba"`, + there are 4 channels in the image tensors. + diff --git a/.tether/man/image_from_array.txt b/.tether/man/image_from_array.txt new file mode 100644 index 0000000000..78cb974e62 --- /dev/null +++ b/.tether/man/image_from_array.txt @@ -0,0 +1,33 @@ +__signature__ +keras.utils.array_to_img( + x, + data_format=None, + scale=True, + dtype=None +) +__doc__ +Converts a 3D NumPy array to a PIL Image instance. + +Example: + +```python +from PIL import Image +img = np.random.random(size=(100, 100, 3)) +pil_img = keras.utils.array_to_img(img) +``` + +Args: + x: Input data, in any form that can be converted to a NumPy array. + data_format: Image data format, can be either `"channels_first"` or + `"channels_last"`. Defaults to `None`, in which case the global + setting `keras.backend.image_data_format()` is used (unless you + changed it, it defaults to `"channels_last"`). + scale: Whether to rescale the image such that minimum and maximum values + are 0 and 255 respectively. Defaults to `True`. + dtype: Dtype to use. `None` means the global setting + `keras.backend.floatx()` is used (unless you changed it, it + defaults to `"float32"`). Defaults to `None`. + +Returns: + A PIL Image instance. + diff --git a/.tether/man/image_load.txt b/.tether/man/image_load.txt new file mode 100644 index 0000000000..78d891a7e2 --- /dev/null +++ b/.tether/man/image_load.txt @@ -0,0 +1,40 @@ +__signature__ +keras.utils.load_img( + path, + color_mode='rgb', + target_size=None, + interpolation='nearest', + keep_aspect_ratio=False +) +__doc__ +Loads an image into PIL format. + +Example: + +```python +image = keras.utils.load_img(image_path) +input_arr = keras.utils.img_to_array(image) +input_arr = np.array([input_arr]) # Convert single image to a batch. +predictions = model.predict(input_arr) +``` + +Args: + path: Path to image file. + color_mode: One of `"grayscale"`, `"rgb"`, `"rgba"`. Default: `"rgb"`. + The desired image format. + target_size: Either `None` (default to original size) or tuple of ints + `(img_height, img_width)`. + interpolation: Interpolation method used to resample the image if the + target size is different from that of the loaded image. Supported + methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. + If PIL version 1.1.3 or newer is installed, `"lanczos"` + is also supported. If PIL version 3.4.0 or newer is installed, + `"box"` and `"hamming"` are also + supported. By default, `"nearest"` is used. + keep_aspect_ratio: Boolean, whether to resize images to a target + size without aspect ratio distortion. The image is cropped in + the center with target aspect ratio before resizing. + +Returns: + A PIL Image instance. + diff --git a/.tether/man/image_smart_resize.txt b/.tether/man/image_smart_resize.txt new file mode 100644 index 0000000000..ac7abf28d5 --- /dev/null +++ b/.tether/man/image_smart_resize.txt @@ -0,0 +1,70 @@ +__signature__ +keras.preprocessing.image.smart_resize( + x, + size, + interpolation='bilinear', + data_format='channels_last', + backend_module=None +) +__doc__ +Resize images to a target size without aspect ratio distortion. + +Image datasets typically yield images that have each a different +size. However, these images need to be batched before they can be +processed by Keras layers. To be batched, images need to share the same +height and width. + +You could simply do, in TF (or JAX equivalent): + +```python +size = (200, 200) +ds = ds.map(lambda img: resize(img, size)) +``` + +However, if you do this, you distort the aspect ratio of your images, since +in general they do not all have the same aspect ratio as `size`. This is +fine in many cases, but not always (e.g. for image generation models +this can be a problem). + +Note that passing the argument `preserve_aspect_ratio=True` to `resize` +will preserve the aspect ratio, but at the cost of no longer respecting the +provided target size. + +This calls for: + +```python +size = (200, 200) +ds = ds.map(lambda img: smart_resize(img, size)) +``` + +Your output images will actually be `(200, 200)`, and will not be distorted. +Instead, the parts of the image that do not fit within the target size +get cropped out. + +The resizing process is: + +1. Take the largest centered crop of the image that has the same aspect +ratio as the target size. For instance, if `size=(200, 200)` and the input +image has size `(340, 500)`, we take a crop of `(340, 340)` centered along +the width. +2. Resize the cropped image to the target size. In the example above, +we resize the `(340, 340)` crop to `(200, 200)`. + +Args: + x: Input image or batch of images (as a tensor or NumPy array). + Must be in format `(height, width, channels)` + or `(batch_size, height, width, channels)`. + size: Tuple of `(height, width)` integer. Target size. + interpolation: String, interpolation to use for resizing. + Defaults to `'bilinear'`. + Supports `bilinear`, `nearest`, `bicubic`, + `lanczos3`, `lanczos5`. + data_format: `"channels_last"` or `"channels_first"`. + backend_module: Backend module to use (if different from the default + backend). + +Returns: + Array with shape `(size[0], size[1], channels)`. + If the input image was a NumPy array, the output is a NumPy array, + and if it was a backend-native tensor, + the output is a backend-native tensor. diff --git a/.tether/man/image_to_array.txt b/.tether/man/image_to_array.txt new file mode 100644 index 0000000000..ef947754cf --- /dev/null +++ b/.tether/man/image_to_array.txt @@ -0,0 +1,31 @@ +__signature__ +keras.utils.img_to_array( + img, + data_format=None, + dtype=None +) +__doc__ +Converts a PIL Image instance to a NumPy array. + +Example: + +```python +from PIL import Image +img_data = np.random.random(size=(100, 100, 3)) +img = keras.utils.array_to_img(img_data) +array = keras.utils.image.img_to_array(img) +``` + +Args: + img: Input PIL Image instance. + data_format: Image data format, can be either `"channels_first"` or + `"channels_last"`. Defaults to `None`, in which case the global + setting `keras.backend.image_data_format()` is used (unless you + changed it, it defaults to `"channels_last"`). + dtype: Dtype to use. `None` means the global setting + `keras.backend.floatx()` is used (unless you changed it, it + defaults to `"float32"`). + +Returns: + A 3D NumPy array. + diff --git a/.tether/man/initializer_constant.txt b/.tether/man/initializer_constant.txt new file mode 100644 index 0000000000..beee9d9c51 --- /dev/null +++ b/.tether/man/initializer_constant.txt @@ -0,0 +1,71 @@ +Help on class Constant in module keras.src.initializers.constant_initializers: + +class Constant(keras.src.initializers.initializer.Initializer) + | Constant(value=0.0) + | + | Initializer that generates tensors with constant values. + | + | Only scalar values are allowed. + | The constant value provided must be convertible to the dtype requested + | when calling the initializer. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = Constant(10.) + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = Constant(10.) + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | value: A Python scalar. + | + | Method resolution order: + | Constant + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. + | + | __init__(self, value=0.0) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Instantiates an initializer from a configuration dictionary. + | + | Example: + | + | ```python + | initializer = RandomUniform(-1, 1) + | config = initializer.get_config() + | initializer = RandomUniform.from_config(config) + | ``` + | + | Args: + | config: A Python dictionary, the output of `get_config()`. + | + | Returns: + | An `Initializer` instance. + | diff --git a/.tether/man/initializer_glorot_normal.txt b/.tether/man/initializer_glorot_normal.txt new file mode 100644 index 0000000000..782cdffbef --- /dev/null +++ b/.tether/man/initializer_glorot_normal.txt @@ -0,0 +1,53 @@ +Help on class GlorotNormal in module keras.src.initializers.random_initializers: + +class GlorotNormal(VarianceScaling) + | GlorotNormal(seed=None) + | + | The Glorot normal initializer, also called Xavier normal initializer. + | + | Draws samples from a truncated normal distribution centered on 0 with + | `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of + | input units in the weight tensor and `fan_out` is the number of output units + | in the weight tensor. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = GlorotNormal() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = GlorotNormal() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Reference: + | + | - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html) + | + | Method resolution order: + | GlorotNormal + | VarianceScaling + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __init__(self, seed=None) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_glorot_uniform.txt b/.tether/man/initializer_glorot_uniform.txt new file mode 100644 index 0000000000..ced7f484f8 --- /dev/null +++ b/.tether/man/initializer_glorot_uniform.txt @@ -0,0 +1,52 @@ +Help on class GlorotUniform in module keras.src.initializers.random_initializers: + +class GlorotUniform(VarianceScaling) + | GlorotUniform(seed=None) + | + | The Glorot uniform initializer, also called Xavier uniform initializer. + | + | Draws samples from a uniform distribution within `[-limit, limit]`, where + | `limit = sqrt(6 / (fan_in + fan_out))` (`fan_in` is the number of input + | units in the weight tensor and `fan_out` is the number of output units). + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = GlorotUniform() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = GlorotUniform() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Reference: + | + | - [Glorot et al., 2010](http://proceedings.mlr.press/v9/glorot10a.html) + | + | Method resolution order: + | GlorotUniform + | VarianceScaling + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __init__(self, seed=None) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_he_normal.txt b/.tether/man/initializer_he_normal.txt new file mode 100644 index 0000000000..e6f0a42088 --- /dev/null +++ b/.tether/man/initializer_he_normal.txt @@ -0,0 +1,52 @@ +Help on class HeNormal in module keras.src.initializers.random_initializers: + +class HeNormal(VarianceScaling) + | HeNormal(seed=None) + | + | He normal initializer. + | + | It draws samples from a truncated normal distribution centered on 0 with + | `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in + | the weight tensor. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = HeNormal() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = HeNormal() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Reference: + | + | - [He et al., 2015](https://arxiv.org/abs/1502.01852) + | + | Method resolution order: + | HeNormal + | VarianceScaling + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __init__(self, seed=None) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_he_uniform.txt b/.tether/man/initializer_he_uniform.txt new file mode 100644 index 0000000000..a6cda5c395 --- /dev/null +++ b/.tether/man/initializer_he_uniform.txt @@ -0,0 +1,52 @@ +Help on class HeUniform in module keras.src.initializers.random_initializers: + +class HeUniform(VarianceScaling) + | HeUniform(seed=None) + | + | He uniform variance scaling initializer. + | + | Draws samples from a uniform distribution within `[-limit, limit]`, where + | `limit = sqrt(6 / fan_in)` (`fan_in` is the number of input units in the + | weight tensor). + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = HeUniform() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = HeUniform() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Reference: + | + | - [He et al., 2015](https://arxiv.org/abs/1502.01852) + | + | Method resolution order: + | HeUniform + | VarianceScaling + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __init__(self, seed=None) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_identity.txt b/.tether/man/initializer_identity.txt new file mode 100644 index 0000000000..a9b9e4dedd --- /dev/null +++ b/.tether/man/initializer_identity.txt @@ -0,0 +1,46 @@ +Help on class Identity in module keras.src.initializers.constant_initializers: + +class Identity(keras.src.initializers.initializer.Initializer) + | Identity(gain=1.0) + | + | Initializer that generates the identity matrix. + | + | Only usable for generating 2D matrices. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = Identity() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = Identity() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | gain: Multiplicative factor to apply to the identity matrix. + | + | Method resolution order: + | Identity + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. Only numeric or boolean dtypes + | are supported. If not specified, `keras.backend.floatx()` + | is used, which default to `float32` unless you configured it + | otherwise (via `keras.backend.set_floatx(float_dtype)`). + | + | __init__(self, gain=1.0) + | Initialize self. See help(type(self)) for accurate signature. + | diff --git a/.tether/man/initializer_lecun_normal.txt b/.tether/man/initializer_lecun_normal.txt new file mode 100644 index 0000000000..6fb993899a --- /dev/null +++ b/.tether/man/initializer_lecun_normal.txt @@ -0,0 +1,56 @@ +Help on class LecunNormal in module keras.src.initializers.random_initializers: + +class LecunNormal(VarianceScaling) + | LecunNormal(seed=None) + | + | Lecun normal initializer. + | + | Initializers allow you to pre-specify an initialization strategy, encoded in + | the Initializer object, without knowing the shape and dtype of the variable + | being initialized. + | + | Draws samples from a truncated normal distribution centered on 0 with + | `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of input units in + | the weight tensor. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = LecunNormal() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = LecunNormal() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Reference: + | + | - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) + | + | Method resolution order: + | LecunNormal + | VarianceScaling + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __init__(self, seed=None) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_lecun_uniform.txt b/.tether/man/initializer_lecun_uniform.txt new file mode 100644 index 0000000000..0ac814b3d5 --- /dev/null +++ b/.tether/man/initializer_lecun_uniform.txt @@ -0,0 +1,52 @@ +Help on class LecunUniform in module keras.src.initializers.random_initializers: + +class LecunUniform(VarianceScaling) + | LecunUniform(seed=None) + | + | Lecun uniform initializer. + | + | Draws samples from a uniform distribution within `[-limit, limit]`, where + | `limit = sqrt(3 / fan_in)` (`fan_in` is the number of input units in the + | weight tensor). + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = LecunUniform() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = LecunUniform() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Reference: + | + | - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) + | + | Method resolution order: + | LecunUniform + | VarianceScaling + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __init__(self, seed=None) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_ones.txt b/.tether/man/initializer_ones.txt new file mode 100644 index 0000000000..eb9625504f --- /dev/null +++ b/.tether/man/initializer_ones.txt @@ -0,0 +1,38 @@ +Help on class Ones in module keras.src.initializers.constant_initializers: + +class Ones(keras.src.initializers.initializer.Initializer) + | Initializer that generates tensors initialized to 1. + | + | Also available via the shortcut function `ones`. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = Ones() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = Ones() + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Method resolution order: + | Ones + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. Only numeric or boolean dtypes + | are supported. If not specified, `keras.backend.floatx()` + | is used, which default to `float32` unless you configured it + | otherwise (via `keras.backend.set_floatx(float_dtype)`). + | diff --git a/.tether/man/initializer_orthogonal.txt b/.tether/man/initializer_orthogonal.txt new file mode 100644 index 0000000000..bd8974ab62 --- /dev/null +++ b/.tether/man/initializer_orthogonal.txt @@ -0,0 +1,68 @@ +Help on class OrthogonalInitializer in module keras.src.initializers.random_initializers: + +class OrthogonalInitializer(keras.src.initializers.initializer.Initializer) + | OrthogonalInitializer(gain=1.0, seed=None) + | + | Initializer that generates an orthogonal matrix. + | + | If the shape of the tensor to initialize is two-dimensional, it is + | initialized with an orthogonal matrix obtained from the QR decomposition of + | a matrix of random numbers drawn from a normal distribution. If the matrix + | has fewer rows than columns then the output will have orthogonal rows. + | Otherwise, the output will have orthogonal columns. + | + | If the shape of the tensor to initialize is more than two-dimensional, + | a matrix of shape `(shape[0] * ... * shape[n - 2], shape[n - 1])` + | is initialized, where `n` is the length of the shape vector. + | The matrix is subsequently reshaped to give a tensor of the desired shape. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = keras.initializers.Orthogonal() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = keras.initializers.Orthogonal() + | >>> layer = keras.layers.Dense(3, kernel_initializer=initializer) + | + | Args: + | gain: Multiplicative factor to apply to the orthogonal matrix. + | seed: A Python integer. Used to make the behavior of the initializer + | deterministic. + | + | Reference: + | + | - [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C) + | + | Method resolution order: + | OrthogonalInitializer + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. + | + | __init__( + | self, + | gain=1.0, + | seed=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_random_normal.txt b/.tether/man/initializer_random_normal.txt new file mode 100644 index 0000000000..96f82f9951 --- /dev/null +++ b/.tether/man/initializer_random_normal.txt @@ -0,0 +1,65 @@ +Help on class RandomNormal in module keras.src.initializers.random_initializers: + +class RandomNormal(keras.src.initializers.initializer.Initializer) + | RandomNormal(mean=0.0, stddev=0.05, seed=None) + | + | Random normal initializer. + | + | Draws samples from a normal distribution for given parameters. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = RandomNormal(mean=0.0, stddev=1.0) + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = RandomNormal(mean=0.0, stddev=1.0) + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | mean: A python scalar or a scalar keras tensor. Mean of the random + | values to generate. + | stddev: A python scalar or a scalar keras tensor. Standard deviation of + | the random values to generate. + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Method resolution order: + | RandomNormal + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. + | + | __init__( + | self, + | mean=0.0, + | stddev=0.05, + | seed=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_random_uniform.txt b/.tether/man/initializer_random_uniform.txt new file mode 100644 index 0000000000..876e658c5d --- /dev/null +++ b/.tether/man/initializer_random_uniform.txt @@ -0,0 +1,65 @@ +Help on class RandomUniform in module keras.src.initializers.random_initializers: + +class RandomUniform(keras.src.initializers.initializer.Initializer) + | RandomUniform(minval=-0.05, maxval=0.05, seed=None) + | + | Random uniform initializer. + | + | Draws samples from a uniform distribution for given parameters. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = RandomUniform(minval=0.0, maxval=1.0) + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = RandomUniform(minval=0.0, maxval=1.0) + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | minval: A python scalar or a scalar keras tensor. Lower bound of the + | range of random values to generate (inclusive). + | maxval: A python scalar or a scalar keras tensor. Upper bound of the + | range of random values to generate (exclusive). + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Method resolution order: + | RandomUniform + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. + | + | __init__( + | self, + | minval=-0.05, + | maxval=0.05, + | seed=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_truncated_normal.txt b/.tether/man/initializer_truncated_normal.txt new file mode 100644 index 0000000000..b1322b9985 --- /dev/null +++ b/.tether/man/initializer_truncated_normal.txt @@ -0,0 +1,68 @@ +Help on class TruncatedNormal in module keras.src.initializers.random_initializers: + +class TruncatedNormal(keras.src.initializers.initializer.Initializer) + | TruncatedNormal(mean=0.0, stddev=0.05, seed=None) + | + | Initializer that generates a truncated normal distribution. + | + | The values generated are similar to values from a + | `RandomNormal` initializer, except that values more + | than two standard deviations from the mean are + | discarded and re-drawn. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = TruncatedNormal(mean=0., stddev=1.) + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = TruncatedNormal(mean=0., stddev=1.) + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | mean: A python scalar or a scalar keras tensor. Mean of the random + | values to generate. + | stddev: A python scalar or a scalar keras tensor. Standard deviation of + | the random values to generate. + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Method resolution order: + | TruncatedNormal + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. + | + | __init__( + | self, + | mean=0.0, + | stddev=0.05, + | seed=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_variance_scaling.txt b/.tether/man/initializer_variance_scaling.txt new file mode 100644 index 0000000000..a8c436fa8f --- /dev/null +++ b/.tether/man/initializer_variance_scaling.txt @@ -0,0 +1,78 @@ +Help on class VarianceScaling in module keras.src.initializers.random_initializers: + +class VarianceScaling(keras.src.initializers.initializer.Initializer) + | VarianceScaling(scale=1.0, mode='fan_in', distribution='truncated_normal', seed=None) + | + | Initializer that adapts its scale to the shape of its input tensors. + | + | With `distribution="truncated_normal" or "untruncated_normal"`, samples are + | drawn from a truncated/untruncated normal distribution with a mean of zero + | and a standard deviation (after truncation, if used) `stddev = sqrt(scale / + | n)`, where `n` is: + | + | - number of input units in the weight tensor, if `mode="fan_in"` + | - number of output units, if `mode="fan_out"` + | - average of the numbers of input and output units, if `mode="fan_avg"` + | + | With `distribution="uniform"`, samples are drawn from a uniform distribution + | within `[-limit, limit]`, where `limit = sqrt(3 * scale / n)`. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = VarianceScaling( + | scale=0.1, mode='fan_in', distribution='uniform') + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = VarianceScaling( + | scale=0.1, mode='fan_in', distribution='uniform') + | >>> layer = Dense(3, kernel_initializer=initializer) + | + | Args: + | scale: Scaling factor (positive float). + | mode: One of `"fan_in"`, `"fan_out"`, `"fan_avg"`. + | distribution: Random distribution to use. + | One of `"truncated_normal"`, `"untruncated_normal"`, or `"uniform"`. + | seed: A Python integer or instance of + | `keras.backend.SeedGenerator`. + | Used to make the behavior of the initializer + | deterministic. Note that an initializer seeded with an integer + | or `None` (unseeded) will produce the same random values + | across multiple calls. To get different random values + | across multiple calls, use as seed an instance + | of `keras.backend.SeedGenerator`. + | + | Method resolution order: + | VarianceScaling + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. + | + | __init__( + | self, + | scale=1.0, + | mode='fan_in', + | distribution='truncated_normal', + | seed=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the initializer's configuration as a JSON-serializable dict. + | + | Returns: + | A JSON-serializable Python dict. + | diff --git a/.tether/man/initializer_zeros.txt b/.tether/man/initializer_zeros.txt new file mode 100644 index 0000000000..16bfe5bf86 --- /dev/null +++ b/.tether/man/initializer_zeros.txt @@ -0,0 +1,36 @@ +Help on class Zeros in module keras.src.initializers.constant_initializers: + +class Zeros(keras.src.initializers.initializer.Initializer) + | Initializer that generates tensors initialized to 0. + | + | Examples: + | + | >>> # Standalone usage: + | >>> initializer = Zeros() + | >>> values = initializer(shape=(2, 2)) + | + | >>> # Usage in a Keras layer: + | >>> initializer = Zeros() + | >>> layer = Dense(units=3, kernel_initializer=initializer) + | + | Method resolution order: + | Zeros + | keras.src.initializers.initializer.Initializer + | builtins.object + | + | Methods defined here: + | + | __call__( + | self, + | shape, + | dtype=None + | ) + | Returns a tensor object initialized as specified by the initializer. + | + | Args: + | shape: Shape of the tensor. + | dtype: Optional dtype of the tensor. Only numeric or boolean dtypes + | are supported. If not specified, `keras.backend.floatx()` + | is used, which default to `float32` unless you configured it + | otherwise (via `keras.backend.set_floatx(float_dtype)`). + | diff --git a/.tether/man/keras.activations.txt b/.tether/man/keras.activations.txt new file mode 100644 index 0000000000..34f9eef2e9 --- /dev/null +++ b/.tether/man/keras.activations.txt @@ -0,0 +1,29 @@ +deserialize(config, custom_objects=None) +elu(x, alpha=1.0) +exponential(x) +gelu(x, approximate=False) +get(identifier) +hard_sigmoid(x) +hard_silu(x) +hard_swish(x) +leaky_relu(x, negative_slope=0.2) +linear(x) +log_softmax(x, axis=-1) +mish(x) +relu( + x, + negative_slope=0.0, + max_value=None, + threshold=0.0 +) +relu6(x) +selu(x) +serialize(activation) +sigmoid(x) +silu(x) +softmax(x, axis=-1) +softplus(x) +softsign(x) +swish(x) +tanh(x) + diff --git a/.tether/man/keras.applications.txt b/.tether/man/keras.applications.txt new file mode 100644 index 0000000000..8d9db78cf3 --- /dev/null +++ b/.tether/man/keras.applications.txt @@ -0,0 +1,415 @@ +convnext: Module(keras.api.applications.convnext) +ConvNeXtBase( + model_name='convnext_base', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ConvNeXtLarge( + model_name='convnext_large', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ConvNeXtSmall( + model_name='convnext_small', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ConvNeXtTiny( + model_name='convnext_tiny', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ConvNeXtXLarge( + model_name='convnext_xlarge', + include_top=True, + include_preprocessing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +densenet: Module(keras.api.applications.densenet) +DenseNet121( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +DenseNet169( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +DenseNet201( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +efficientnet: Module(keras.api.applications.efficientnet) +efficientnet_v2: Module(keras.api.applications.efficientnet_v2) +EfficientNetB0( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetB1( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetB2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetB3( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetB4( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetB5( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetB6( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetB7( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + **kwargs +) +EfficientNetV2B0( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +EfficientNetV2B1( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +EfficientNetV2B2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +EfficientNetV2B3( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +EfficientNetV2L( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +EfficientNetV2M( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +EfficientNetV2S( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax', + include_preprocessing=True +) +imagenet_utils: Module(keras.api.applications.imagenet_utils) +inception_resnet_v2: Module(keras.api.applications.inception_resnet_v2) +inception_v3: Module(keras.api.applications.inception_v3) +InceptionResNetV2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +InceptionV3( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +mobilenet: Module(keras.api.applications.mobilenet) +MobileNet( + input_shape=None, + alpha=1.0, + depth_multiplier=1, + dropout=0.001, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +mobilenet_v2: Module(keras.api.applications.mobilenet_v2) +mobilenet_v3: Module(keras.api.applications.mobilenet_v3) +MobileNetV2( + input_shape=None, + alpha=1.0, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +MobileNetV3Large( + input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation='softmax', + include_preprocessing=True +) +MobileNetV3Small( + input_shape=None, + alpha=1.0, + minimalistic=False, + include_top=True, + weights='imagenet', + input_tensor=None, + classes=1000, + pooling=None, + dropout_rate=0.2, + classifier_activation='softmax', + include_preprocessing=True +) +nasnet: Module(keras.api.applications.nasnet) +NASNetLarge( + input_shape=None, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +NASNetMobile( + input_shape=None, + include_top=True, + weights='imagenet', + input_tensor=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +resnet: Module(keras.api.applications.resnet) +resnet_v2: Module(keras.api.applications.resnet_v2) +ResNet101( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ResNet101V2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ResNet152( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ResNet152V2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +resnet50: Module(keras.api.applications.resnet50) +ResNet50( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +ResNet50V2( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +vgg16: Module(keras.api.applications.vgg16) +VGG16( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +vgg19: Module(keras.api.applications.vgg19) +VGG19( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) +xception: Module(keras.api.applications.xception) +Xception( + include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax' +) + diff --git a/.tether/man/keras.backend.txt b/.tether/man/keras.backend.txt new file mode 100644 index 0000000000..fa41652776 --- /dev/null +++ b/.tether/man/keras.backend.txt @@ -0,0 +1,15 @@ +backend() +clear_session(free_memory=True) +epsilon() +floatx() +get_uid(prefix='') +image_data_format() +is_float_dtype(dtype) +is_int_dtype(dtype) +is_keras_tensor(x) +result_type(*dtypes) +set_epsilon(value) +set_floatx(value) +set_image_data_format(data_format) +standardize_dtype(dtype) + diff --git a/.tether/man/keras.callbacks.txt b/.tether/man/keras.callbacks.txt new file mode 100644 index 0000000000..70580697fc --- /dev/null +++ b/.tether/man/keras.callbacks.txt @@ -0,0 +1,82 @@ +BackupAndRestore( + backup_dir, + save_freq='epoch', + delete_checkpoint=True +) +Callback() +CallbackList( + callbacks=None, + add_history=False, + add_progbar=False, + model=None, + **params +) +CSVLogger( + filename, + separator=',', + append=False +) +EarlyStopping( + monitor='val_loss', + min_delta=0, + patience=0, + verbose=0, + mode='auto', + baseline=None, + restore_best_weights=False, + start_from_epoch=0 +) +History() +LambdaCallback( + on_epoch_begin=None, + on_epoch_end=None, + on_train_begin=None, + on_train_end=None, + on_train_batch_begin=None, + on_train_batch_end=None, + **kwargs +) +LearningRateScheduler(schedule, verbose=0) +ModelCheckpoint( + filepath, + monitor='val_loss', + verbose=0, + save_best_only=False, + save_weights_only=False, + mode='auto', + save_freq='epoch', + initial_value_threshold=None +) +ProgbarLogger() +ReduceLROnPlateau( + monitor='val_loss', + factor=0.1, + patience=10, + verbose=0, + mode='auto', + min_delta=0.0001, + cooldown=0, + min_lr=0.0, + **kwargs +) +RemoteMonitor( + root='http://localhost:9000', + path='/publish/epoch/end/', + field='data', + headers=None, + send_as_json=False +) +SwapEMAWeights(swap_on_epoch=False) +TensorBoard( + log_dir='logs', + histogram_freq=0, + write_graph=True, + write_images=False, + write_steps_per_second=False, + update_freq='epoch', + profile_batch=0, + embeddings_freq=0, + embeddings_metadata=None +) +TerminateOnNaN() + diff --git a/.tether/man/keras.config.txt b/.tether/man/keras.config.txt new file mode 100644 index 0000000000..849a70d14e --- /dev/null +++ b/.tether/man/keras.config.txt @@ -0,0 +1,18 @@ +backend() +disable_interactive_logging() +disable_traceback_filtering() +dtype_policy() +enable_interactive_logging() +enable_traceback_filtering() +enable_unsafe_deserialization() +epsilon() +floatx() +image_data_format() +is_interactive_logging_enabled() +is_traceback_filtering_enabled() +set_backend(backend) +set_dtype_policy(policy) +set_epsilon(value) +set_floatx(value) +set_image_data_format(data_format) + diff --git a/.tether/man/keras.constraints.txt b/.tether/man/keras.constraints.txt new file mode 100644 index 0000000000..3dd9da0a0f --- /dev/null +++ b/.tether/man/keras.constraints.txt @@ -0,0 +1,23 @@ +Constraint() +deserialize(config, custom_objects=None) +get(identifier) +max_norm(max_value=2, axis=0) +MaxNorm(max_value=2, axis=0) +min_max_norm( + min_value=0.0, + max_value=1.0, + rate=1.0, + axis=0 +) +MinMaxNorm( + min_value=0.0, + max_value=1.0, + rate=1.0, + axis=0 +) +non_neg() +NonNeg() +serialize(constraint) +unit_norm(axis=0) +UnitNorm(axis=0) + diff --git a/.tether/man/keras.datasets.txt b/.tether/man/keras.datasets.txt new file mode 100644 index 0000000000..f771e1b502 --- /dev/null +++ b/.tether/man/keras.datasets.txt @@ -0,0 +1,9 @@ +boston_housing: Module(keras.api.datasets.boston_housing) +california_housing: Module(keras.api.datasets.california_housing) +cifar10: Module(keras.api.datasets.cifar10) +cifar100: Module(keras.api.datasets.cifar100) +fashion_mnist: Module(keras.api.datasets.fashion_mnist) +imdb: Module(keras.api.datasets.imdb) +mnist: Module(keras.api.datasets.mnist) +reuters: Module(keras.api.datasets.reuters) + diff --git a/.tether/man/keras.distribution.txt b/.tether/man/keras.distribution.txt new file mode 100644 index 0000000000..2abe20bada --- /dev/null +++ b/.tether/man/keras.distribution.txt @@ -0,0 +1,23 @@ +DataParallel(device_mesh=None, devices=None) +DeviceMesh( + shape, + axis_names, + devices=None +) +distribute_tensor(tensor, layout) +distribution() +initialize( + job_addresses=None, + num_processes=None, + process_id=None +) +LayoutMap(device_mesh=None) +list_devices(device_type=None) +ModelParallel( + device_mesh, + layout_map, + batch_dim_name=None +) +set_distribution(value) +TensorLayout(axes, device_mesh=None) + diff --git a/.tether/man/keras.dtype_policies.txt b/.tether/man/keras.dtype_policies.txt new file mode 100644 index 0000000000..1835f6151c --- /dev/null +++ b/.tether/man/keras.dtype_policies.txt @@ -0,0 +1,12 @@ +deserialize(config, custom_objects=None) +DTypePolicy( + name, + *args, + **kwargs +) +FloatDTypePolicy(name) +get(identifier) +QuantizedDTypePolicy(name) +QuantizedFloat8DTypePolicy(name, amax_history_length=1024) +serialize(dtype_policy) + diff --git a/.tether/man/keras.export.txt b/.tether/man/keras.export.txt new file mode 100644 index 0000000000..369e89b397 --- /dev/null +++ b/.tether/man/keras.export.txt @@ -0,0 +1,2 @@ +ExportArchive() + diff --git a/.tether/man/keras.initializers.txt b/.tether/man/keras.initializers.txt new file mode 100644 index 0000000000..e9b2f68300 --- /dev/null +++ b/.tether/man/keras.initializers.txt @@ -0,0 +1,71 @@ +constant(value=0.0) +Constant(value=0.0) +deserialize(config, custom_objects=None) +get(identifier) +glorot_normal(seed=None) +glorot_uniform(seed=None) +GlorotNormal(seed=None) +GlorotUniform(seed=None) +he_normal(seed=None) +he_uniform(seed=None) +HeNormal(seed=None) +HeUniform(seed=None) +identity(gain=1.0) +Identity(gain=1.0) +IdentityInitializer(gain=1.0) +Initializer() +lecun_normal(seed=None) +lecun_uniform(seed=None) +LecunNormal(seed=None) +LecunUniform(seed=None) +ones() +Ones() +orthogonal(gain=1.0, seed=None) +Orthogonal(gain=1.0, seed=None) +OrthogonalInitializer(gain=1.0, seed=None) +random_normal( + mean=0.0, + stddev=0.05, + seed=None +) +random_uniform( + minval=-0.05, + maxval=0.05, + seed=None +) +RandomNormal( + mean=0.0, + stddev=0.05, + seed=None +) +RandomUniform( + minval=-0.05, + maxval=0.05, + seed=None +) +serialize(initializer) +truncated_normal( + mean=0.0, + stddev=0.05, + seed=None +) +TruncatedNormal( + mean=0.0, + stddev=0.05, + seed=None +) +variance_scaling( + scale=1.0, + mode='fan_in', + distribution='truncated_normal', + seed=None +) +VarianceScaling( + scale=1.0, + mode='fan_in', + distribution='truncated_normal', + seed=None +) +zeros() +Zeros() + diff --git a/.tether/man/keras.layers.txt b/.tether/man/keras.layers.txt new file mode 100644 index 0000000000..38bec143e4 --- /dev/null +++ b/.tether/man/keras.layers.txt @@ -0,0 +1,1257 @@ +Activation(activation, **kwargs) +ActivityRegularization( + l1=0.0, + l2=0.0, + **kwargs +) +add(inputs, **kwargs) +Add(**kwargs) +AdditiveAttention( + use_scale=True, + dropout=0.0, + **kwargs +) +AlphaDropout( + rate, + noise_shape=None, + seed=None, + **kwargs +) +Attention( + use_scale=False, + score_mode='dot', + dropout=0.0, + seed=None, + **kwargs +) +average(inputs, **kwargs) +Average(**kwargs) +AveragePooling1D( + pool_size, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +AveragePooling2D( + pool_size, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +AveragePooling3D( + pool_size, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +AvgPool1D( + pool_size, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +AvgPool2D( + pool_size, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +AvgPool3D( + pool_size, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +BatchNormalization( + axis=-1, + momentum=0.99, + epsilon=0.001, + center=True, + scale=True, + beta_initializer='zeros', + gamma_initializer='ones', + moving_mean_initializer='zeros', + moving_variance_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + synchronized=False, + **kwargs +) +Bidirectional( + layer, + merge_mode='concat', + weights=None, + backward_layer=None, + **kwargs +) +CategoryEncoding( + num_tokens=None, + output_mode='multi_hot', + sparse=False, + **kwargs +) +CenterCrop( + height, + width, + data_format=None, + **kwargs +) +concatenate( + inputs, + axis=-1, + **kwargs +) +Concatenate(axis=-1, **kwargs) +Conv1D( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + groups=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Conv1DTranspose( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Conv2D( + filters, + kernel_size, + strides=(1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Conv2DTranspose( + filters, + kernel_size, + strides=(1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1), + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Conv3D( + filters, + kernel_size, + strides=(1, 1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Conv3DTranspose( + filters, + kernel_size, + strides=(1, 1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1, 1), + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +ConvLSTM1D( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + activation='tanh', + recurrent_activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + seed=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + **kwargs +) +ConvLSTM2D( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + activation='tanh', + recurrent_activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + seed=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + **kwargs +) +ConvLSTM3D( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + activation='tanh', + recurrent_activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + seed=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + **kwargs +) +Convolution1D( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + groups=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Convolution1DTranspose( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Convolution2D( + filters, + kernel_size, + strides=(1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Convolution2DTranspose( + filters, + kernel_size, + strides=(1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1), + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Convolution3D( + filters, + kernel_size, + strides=(1, 1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1, 1), + groups=1, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Convolution3DTranspose( + filters, + kernel_size, + strides=(1, 1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1, 1), + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +Cropping1D(cropping=(1, 1), **kwargs) +Cropping2D( + cropping=((0, 0), (0, 0)), + data_format=None, + **kwargs +) +Cropping3D( + cropping=((1, 1), (1, 1), (1, 1)), + data_format=None, + **kwargs +) +Dense( + units, + activation=None, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + lora_rank=None, + **kwargs +) +DepthwiseConv1D( + kernel_size, + strides=1, + padding='valid', + depth_multiplier=1, + data_format=None, + dilation_rate=1, + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs +) +DepthwiseConv2D( + kernel_size, + strides=(1, 1), + padding='valid', + depth_multiplier=1, + data_format=None, + dilation_rate=(1, 1), + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + bias_constraint=None, + **kwargs +) +deserialize(config, custom_objects=None) +Discretization( + bin_boundaries=None, + num_bins=None, + epsilon=0.01, + output_mode='int', + sparse=False, + dtype=None, + name=None +) +dot( + inputs, + axes=-1, + **kwargs +) +Dot( + axes, + normalize=False, + **kwargs +) +Dropout( + rate, + noise_shape=None, + seed=None, + **kwargs +) +EinsumDense( + equation, + output_shape, + activation=None, + bias_axes=None, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + lora_rank=None, + **kwargs +) +ELU(alpha=1.0, **kwargs) +Embedding( + input_dim, + output_dim, + embeddings_initializer='uniform', + embeddings_regularizer=None, + embeddings_constraint=None, + mask_zero=False, + weights=None, + lora_rank=None, + **kwargs +) +Flatten(data_format=None, **kwargs) +FlaxLayer( + module, + method=None, + variables=None, + **kwargs +) +GaussianDropout( + rate, + seed=None, + **kwargs +) +GaussianNoise( + stddev, + seed=None, + **kwargs +) +GlobalAveragePooling1D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalAveragePooling2D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalAveragePooling3D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalAvgPool1D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalAvgPool2D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalAvgPool3D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalMaxPool1D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalMaxPool2D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalMaxPool3D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalMaxPooling1D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalMaxPooling2D( + data_format=None, + keepdims=False, + **kwargs +) +GlobalMaxPooling3D( + data_format=None, + keepdims=False, + **kwargs +) +GroupNormalization( + groups=32, + axis=-1, + epsilon=0.001, + center=True, + scale=True, + beta_initializer='zeros', + gamma_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + **kwargs +) +GroupQueryAttention( + head_dim, + num_query_heads, + num_key_value_heads, + dropout=0.0, + use_bias=True, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +GRU( + units, + activation='tanh', + recurrent_activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + seed=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + reset_after=True, + use_cudnn='auto', + **kwargs +) +GRUCell( + units, + activation='tanh', + recurrent_activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + reset_after=True, + seed=None, + **kwargs +) +HashedCrossing( + num_bins, + output_mode='int', + sparse=False, + name=None, + dtype=None, + **kwargs +) +Hashing( + num_bins, + mask_value=None, + salt=None, + output_mode='int', + sparse=False, + **kwargs +) +Identity(**kwargs) +Input( + shape=None, + batch_size=None, + dtype=None, + sparse=None, + batch_shape=None, + name=None, + tensor=None +) +InputLayer( + shape=None, + batch_size=None, + dtype=None, + sparse=None, + batch_shape=None, + input_tensor=None, + name=None, + **kwargs +) +InputSpec( + dtype=None, + shape=None, + ndim=None, + max_ndim=None, + min_ndim=None, + axes=None, + allow_last_axis_squeeze=False, + name=None +) +IntegerLookup( + max_tokens=None, + num_oov_indices=1, + mask_token=None, + oov_token=-1, + vocabulary=None, + vocabulary_dtype='int64', + idf_weights=None, + invert=False, + output_mode='int', + sparse=False, + pad_to_max_tokens=False, + name=None, + **kwargs +) +JaxLayer( + call_fn, + init_fn=None, + params=None, + state=None, + seed=None, + **kwargs +) +Lambda( + function, + output_shape=None, + mask=None, + arguments=None, + **kwargs +) +Layer(*args, **kwargs) +LayerNormalization( + axis=-1, + epsilon=0.001, + center=True, + scale=True, + rms_scaling=False, + beta_initializer='zeros', + gamma_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + **kwargs +) +LeakyReLU(negative_slope=0.3, **kwargs) +LSTM( + units, + activation='tanh', + recurrent_activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + seed=None, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + use_cudnn='auto', + **kwargs +) +LSTMCell( + units, + activation='tanh', + recurrent_activation='sigmoid', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + unit_forget_bias=True, + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + seed=None, + **kwargs +) +Masking(mask_value=0.0, **kwargs) +maximum(inputs, **kwargs) +Maximum(**kwargs) +MaxPool1D( + pool_size=2, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +MaxPool2D( + pool_size=(2, 2), + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +MaxPool3D( + pool_size=(2, 2, 2), + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +MaxPooling1D( + pool_size=2, + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +MaxPooling2D( + pool_size=(2, 2), + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +MaxPooling3D( + pool_size=(2, 2, 2), + strides=None, + padding='valid', + data_format=None, + name=None, + **kwargs +) +MelSpectrogram( + fft_length=2048, + sequence_stride=512, + sequence_length=None, + window='hann', + sampling_rate=16000, + num_mel_bins=128, + min_freq=20.0, + max_freq=None, + power_to_db=True, + top_db=80.0, + mag_exp=2.0, + min_power=1e-10, + ref_power=1.0, + **kwargs +) +minimum(inputs, **kwargs) +Minimum(**kwargs) +MultiHeadAttention( + num_heads, + key_dim, + value_dim=None, + dropout=0.0, + use_bias=True, + output_shape=None, + attention_axes=None, + kernel_initializer='glorot_uniform', + bias_initializer='zeros', + kernel_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + bias_constraint=None, + **kwargs +) +multiply(inputs, **kwargs) +Multiply(**kwargs) +Normalization( + axis=-1, + mean=None, + variance=None, + invert=False, + **kwargs +) +Permute(dims, **kwargs) +PReLU( + alpha_initializer='Zeros', + alpha_regularizer=None, + alpha_constraint=None, + shared_axes=None, + **kwargs +) +RandomBrightness( + factor, + value_range=(0, 255), + seed=None, + **kwargs +) +RandomContrast( + factor, + seed=None, + **kwargs +) +RandomCrop( + height, + width, + seed=None, + data_format=None, + name=None, + **kwargs +) +RandomFlip( + mode='horizontal_and_vertical', + seed=None, + **kwargs +) +RandomRotation( + factor, + fill_mode='reflect', + interpolation='bilinear', + seed=None, + fill_value=0.0, + value_range=(0, 255), + data_format=None, + **kwargs +) +RandomTranslation( + height_factor, + width_factor, + fill_mode='reflect', + interpolation='bilinear', + seed=None, + fill_value=0.0, + data_format=None, + **kwargs +) +RandomZoom( + height_factor, + width_factor=None, + fill_mode='reflect', + interpolation='bilinear', + seed=None, + fill_value=0.0, + data_format=None, + **kwargs +) +ReLU( + max_value=None, + negative_slope=0.0, + threshold=0.0, + **kwargs +) +RepeatVector(n, **kwargs) +Rescaling( + scale, + offset=0.0, + **kwargs +) +Reshape(target_shape, **kwargs) +Resizing( + height, + width, + interpolation='bilinear', + crop_to_aspect_ratio=False, + pad_to_aspect_ratio=False, + fill_mode='constant', + fill_value=0.0, + data_format=None, + **kwargs +) +RNN( + cell, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + zero_output_for_mask=False, + **kwargs +) +SeparableConv1D( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + pointwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs +) +SeparableConv2D( + filters, + kernel_size, + strides=(1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1), + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + pointwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs +) +SeparableConvolution1D( + filters, + kernel_size, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1, + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + pointwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs +) +SeparableConvolution2D( + filters, + kernel_size, + strides=(1, 1), + padding='valid', + data_format=None, + dilation_rate=(1, 1), + depth_multiplier=1, + activation=None, + use_bias=True, + depthwise_initializer='glorot_uniform', + pointwise_initializer='glorot_uniform', + bias_initializer='zeros', + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + depthwise_constraint=None, + pointwise_constraint=None, + bias_constraint=None, + **kwargs +) +serialize(layer) +SimpleRNN( + units, + activation='tanh', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + return_sequences=False, + return_state=False, + go_backwards=False, + stateful=False, + unroll=False, + seed=None, + **kwargs +) +SimpleRNNCell( + units, + activation='tanh', + use_bias=True, + kernel_initializer='glorot_uniform', + recurrent_initializer='orthogonal', + bias_initializer='zeros', + kernel_regularizer=None, + recurrent_regularizer=None, + bias_regularizer=None, + kernel_constraint=None, + recurrent_constraint=None, + bias_constraint=None, + dropout=0.0, + recurrent_dropout=0.0, + seed=None, + **kwargs +) +Softmax(axis=-1, **kwargs) +SpatialDropout1D( + rate, + seed=None, + name=None, + dtype=None +) +SpatialDropout2D( + rate, + data_format=None, + seed=None, + name=None, + dtype=None +) +SpatialDropout3D( + rate, + data_format=None, + seed=None, + name=None, + dtype=None +) +SpectralNormalization( + layer, + power_iterations=1, + **kwargs +) +StackedRNNCells(cells, **kwargs) +StringLookup( + max_tokens=None, + num_oov_indices=1, + mask_token=None, + oov_token='[UNK]', + vocabulary=None, + idf_weights=None, + invert=False, + output_mode='int', + pad_to_max_tokens=False, + sparse=False, + encoding='utf-8', + name=None, + **kwargs +) +subtract(inputs, **kwargs) +Subtract(**kwargs) +TextVectorization( + max_tokens=None, + standardize='lower_and_strip_punctuation', + split='whitespace', + ngrams=None, + output_mode='int', + output_sequence_length=None, + pad_to_max_tokens=False, + vocabulary=None, + idf_weights=None, + sparse=False, + ragged=False, + encoding='utf-8', + name=None, + **kwargs +) +TFSMLayer( + filepath, + call_endpoint='serve', + call_training_endpoint=None, + trainable=True, + name=None, + dtype=None +) +TimeDistributed(layer, **kwargs) +TorchModuleWrapper( + module, + name=None, + **kwargs +) +UnitNormalization(axis=-1, **kwargs) +UpSampling1D(size=2, **kwargs) +UpSampling2D( + size=(2, 2), + data_format=None, + interpolation='nearest', + **kwargs +) +UpSampling3D( + size=(2, 2, 2), + data_format=None, + **kwargs +) +Wrapper(layer, **kwargs) +ZeroPadding1D(padding=1, **kwargs) +ZeroPadding2D( + padding=(1, 1), + data_format=None, + **kwargs +) +ZeroPadding3D( + padding=((1, 1), (1, 1), (1, 1)), + data_format=None, + **kwargs +) + diff --git a/.tether/man/keras.legacy.saving.txt b/.tether/man/keras.legacy.saving.txt new file mode 100644 index 0000000000..44f4183c4f --- /dev/null +++ b/.tether/man/keras.legacy.saving.txt @@ -0,0 +1,8 @@ +deserialize_keras_object( + identifier, + module_objects=None, + custom_objects=None, + printable_module_name='object' +) +serialize_keras_object(instance) + diff --git a/.tether/man/keras.legacy.txt b/.tether/man/keras.legacy.txt new file mode 100644 index 0000000000..13f97b309b --- /dev/null +++ b/.tether/man/keras.legacy.txt @@ -0,0 +1,2 @@ +saving: Module(keras.api.legacy.saving) + diff --git a/.tether/man/keras.losses.txt b/.tether/man/keras.losses.txt new file mode 100644 index 0000000000..7727a1eb76 --- /dev/null +++ b/.tether/man/keras.losses.txt @@ -0,0 +1,144 @@ +binary_crossentropy( + y_true, + y_pred, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +binary_focal_crossentropy( + y_true, + y_pred, + apply_class_balancing=False, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +BinaryCrossentropy( + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction='sum_over_batch_size', + name='binary_crossentropy' +) +BinaryFocalCrossentropy( + apply_class_balancing=False, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction='sum_over_batch_size', + name='binary_focal_crossentropy' +) +categorical_crossentropy( + y_true, + y_pred, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +categorical_focal_crossentropy( + y_true, + y_pred, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +categorical_hinge(y_true, y_pred) +CategoricalCrossentropy( + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction='sum_over_batch_size', + name='categorical_crossentropy' +) +CategoricalFocalCrossentropy( + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1, + reduction='sum_over_batch_size', + name='categorical_focal_crossentropy' +) +CategoricalHinge(reduction='sum_over_batch_size', name='categorical_hinge') +cosine_similarity( + y_true, + y_pred, + axis=-1 +) +CosineSimilarity( + axis=-1, + reduction='sum_over_batch_size', + name='cosine_similarity' +) +ctc(y_true, y_pred) +CTC(reduction='sum_over_batch_size', name='sparse_categorical_crossentropy') +deserialize(name, custom_objects=None) +dice(y_true, y_pred) +Dice(reduction='sum_over_batch_size', name='dice') +get(identifier) +hinge(y_true, y_pred) +Hinge(reduction='sum_over_batch_size', name='hinge') +huber( + y_true, + y_pred, + delta=1.0 +) +Huber( + delta=1.0, + reduction='sum_over_batch_size', + name='huber_loss' +) +kl_divergence(y_true, y_pred) +KLDivergence(reduction='sum_over_batch_size', name='kl_divergence') +log_cosh(y_true, y_pred) +LogCosh(reduction='sum_over_batch_size', name='log_cosh') +Loss( + name=None, + reduction='sum_over_batch_size', + dtype=None +) +mean_absolute_error(y_true, y_pred) +mean_absolute_percentage_error(y_true, y_pred) +mean_squared_error(y_true, y_pred) +mean_squared_logarithmic_error(y_true, y_pred) +MeanAbsoluteError(reduction='sum_over_batch_size', name='mean_absolute_error') +MeanAbsolutePercentageError(reduction='sum_over_batch_size', name='mean_absolute_percentage_error') +MeanSquaredError(reduction='sum_over_batch_size', name='mean_squared_error') +MeanSquaredLogarithmicError(reduction='sum_over_batch_size', name='mean_squared_logarithmic_error') +poisson(y_true, y_pred) +Poisson(reduction='sum_over_batch_size', name='poisson') +serialize(loss) +sparse_categorical_crossentropy( + y_true, + y_pred, + from_logits=False, + ignore_class=None, + axis=-1 +) +SparseCategoricalCrossentropy( + from_logits=False, + ignore_class=None, + reduction='sum_over_batch_size', + name='sparse_categorical_crossentropy' +) +squared_hinge(y_true, y_pred) +SquaredHinge(reduction='sum_over_batch_size', name='squared_hinge') +tversky( + y_true, + y_pred, + alpha=0.5, + beta=0.5 +) +Tversky( + alpha=0.5, + beta=0.5, + reduction='sum_over_batch_size', + name='tversky' +) + diff --git a/.tether/man/keras.metrics.txt b/.tether/man/keras.metrics.txt new file mode 100644 index 0000000000..ce73bc2a39 --- /dev/null +++ b/.tether/man/keras.metrics.txt @@ -0,0 +1,273 @@ +Accuracy(name='accuracy', dtype=None) +AUC( + num_thresholds=200, + curve='ROC', + summation_method='interpolation', + name=None, + dtype=None, + thresholds=None, + multi_label=False, + num_labels=None, + label_weights=None, + from_logits=False +) +binary_accuracy( + y_true, + y_pred, + threshold=0.5 +) +binary_crossentropy( + y_true, + y_pred, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +binary_focal_crossentropy( + y_true, + y_pred, + apply_class_balancing=False, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +BinaryAccuracy( + name='binary_accuracy', + dtype=None, + threshold=0.5 +) +BinaryCrossentropy( + name='binary_crossentropy', + dtype=None, + from_logits=False, + label_smoothing=0 +) +BinaryIoU( + target_class_ids=(0, 1), + threshold=0.5, + name=None, + dtype=None +) +categorical_accuracy(y_true, y_pred) +categorical_crossentropy( + y_true, + y_pred, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +categorical_focal_crossentropy( + y_true, + y_pred, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +categorical_hinge(y_true, y_pred) +CategoricalAccuracy(name='categorical_accuracy', dtype=None) +CategoricalCrossentropy( + name='categorical_crossentropy', + dtype=None, + from_logits=False, + label_smoothing=0, + axis=-1 +) +CategoricalHinge(name='categorical_hinge', dtype=None) +CosineSimilarity( + name='cosine_similarity', + dtype=None, + axis=-1 +) +deserialize(config, custom_objects=None) +F1Score( + average=None, + threshold=None, + name='f1_score', + dtype=None +) +FalseNegatives( + thresholds=None, + name=None, + dtype=None +) +FalsePositives( + thresholds=None, + name=None, + dtype=None +) +FBetaScore( + average=None, + beta=1.0, + threshold=None, + name='fbeta_score', + dtype=None +) +get(identifier) +hinge(y_true, y_pred) +Hinge(name='hinge', dtype=None) +huber( + y_true, + y_pred, + delta=1.0 +) +IoU( + num_classes, + target_class_ids, + name=None, + dtype=None, + ignore_class=None, + sparse_y_true=True, + sparse_y_pred=True, + axis=-1 +) +kl_divergence(y_true, y_pred) +KLDivergence(name='kl_divergence', dtype=None) +log_cosh(y_true, y_pred) +LogCoshError(name='logcosh', dtype=None) +Mean(name='mean', dtype=None) +mean_absolute_error(y_true, y_pred) +mean_absolute_percentage_error(y_true, y_pred) +mean_squared_error(y_true, y_pred) +mean_squared_logarithmic_error(y_true, y_pred) +MeanAbsoluteError(name='mean_absolute_error', dtype=None) +MeanAbsolutePercentageError(name='mean_absolute_percentage_error', dtype=None) +MeanIoU( + num_classes, + name=None, + dtype=None, + ignore_class=None, + sparse_y_true=True, + sparse_y_pred=True, + axis=-1 +) +MeanMetricWrapper( + fn, + name=None, + dtype=None, + **kwargs +) +MeanSquaredError(name='mean_squared_error', dtype=None) +MeanSquaredLogarithmicError(name='mean_squared_logarithmic_error', dtype=None) +Metric(dtype=None, name=None) +OneHotIoU( + num_classes, + target_class_ids, + name=None, + dtype=None, + ignore_class=None, + sparse_y_pred=False, + axis=-1 +) +OneHotMeanIoU( + num_classes, + name=None, + dtype=None, + ignore_class=None, + sparse_y_pred=False, + axis=-1 +) +poisson(y_true, y_pred) +Poisson(name='poisson', dtype=None) +Precision( + thresholds=None, + top_k=None, + class_id=None, + name=None, + dtype=None +) +PrecisionAtRecall( + recall, + num_thresholds=200, + class_id=None, + name=None, + dtype=None +) +R2Score( + class_aggregation='uniform_average', + num_regressors=0, + name='r2_score', + dtype=None +) +Recall( + thresholds=None, + top_k=None, + class_id=None, + name=None, + dtype=None +) +RecallAtPrecision( + precision, + num_thresholds=200, + class_id=None, + name=None, + dtype=None +) +RootMeanSquaredError(name='root_mean_squared_error', dtype=None) +SensitivityAtSpecificity( + specificity, + num_thresholds=200, + class_id=None, + name=None, + dtype=None +) +serialize(metric) +sparse_categorical_accuracy(y_true, y_pred) +sparse_categorical_crossentropy( + y_true, + y_pred, + from_logits=False, + ignore_class=None, + axis=-1 +) +sparse_top_k_categorical_accuracy( + y_true, + y_pred, + k=5 +) +SparseCategoricalAccuracy(name='sparse_categorical_accuracy', dtype=None) +SparseCategoricalCrossentropy( + name='sparse_categorical_crossentropy', + dtype=None, + from_logits=False, + axis=-1 +) +SparseTopKCategoricalAccuracy( + k=5, + name='sparse_top_k_categorical_accuracy', + dtype=None +) +SpecificityAtSensitivity( + sensitivity, + num_thresholds=200, + class_id=None, + name=None, + dtype=None +) +squared_hinge(y_true, y_pred) +SquaredHinge(name='squared_hinge', dtype=None) +Sum(name='sum', dtype=None) +top_k_categorical_accuracy( + y_true, + y_pred, + k=5 +) +TopKCategoricalAccuracy( + k=5, + name='top_k_categorical_accuracy', + dtype=None +) +TrueNegatives( + thresholds=None, + name=None, + dtype=None +) +TruePositives( + thresholds=None, + name=None, + dtype=None +) + diff --git a/.tether/man/keras.mixed_precision.txt b/.tether/man/keras.mixed_precision.txt new file mode 100644 index 0000000000..619396380c --- /dev/null +++ b/.tether/man/keras.mixed_precision.txt @@ -0,0 +1,21 @@ +dtype_policy() +DTypePolicy( + name, + *args, + **kwargs +) +global_policy() +LossScaleOptimizer( + inner_optimizer, + initial_scale=32768.0, + dynamic_growth_steps=2000, + **kwargs +) +Policy( + name, + *args, + **kwargs +) +set_dtype_policy(policy) +set_global_policy(policy) + diff --git a/.tether/man/keras.models.txt b/.tether/man/keras.models.txt new file mode 100644 index 0000000000..5be3f045ad --- /dev/null +++ b/.tether/man/keras.models.txt @@ -0,0 +1,21 @@ +clone_model( + model, + input_tensors=None, + clone_function=None +) +load_model( + filepath, + custom_objects=None, + compile=True, + safe_mode=True +) +Model(*args, **kwargs) +model_from_json(json_string, custom_objects=None) +save_model( + model, + filepath, + overwrite=True, + **kwargs +) +Sequential(*args, **kwargs) + diff --git a/.tether/man/keras.ops.image.txt b/.tether/man/keras.ops.image.txt new file mode 100644 index 0000000000..c729e35397 --- /dev/null +++ b/.tether/man/keras.ops.image.txt @@ -0,0 +1,54 @@ +affine_transform( + image, + transform, + interpolation='bilinear', + fill_mode='constant', + fill_value=0, + data_format='channels_last' +) +crop_images( + images, + top_cropping=None, + left_cropping=None, + target_height=None, + target_width=None, + bottom_cropping=None, + right_cropping=None +) +extract_patches( + image, + size, + strides=None, + dilation_rate=1, + padding='valid', + data_format='channels_last' +) +map_coordinates( + input, + coordinates, + order, + fill_mode='constant', + fill_value=0 +) +pad_images( + images, + top_padding=None, + left_padding=None, + target_height=None, + target_width=None, + bottom_padding=None, + right_padding=None +) +resize( + image, + size, + interpolation='bilinear', + antialias=False, + crop_to_aspect_ratio=False, + pad_to_aspect_ratio=False, + fill_mode='constant', + fill_value=0.0, + data_format='channels_last' +) +rgb_to_grayscale(image, data_format='channels_last') + diff --git a/.tether/man/keras.ops.nn.txt b/.tether/man/keras.ops.nn.txt new file mode 100644 index 0000000000..8b985f9a64 --- /dev/null +++ b/.tether/man/keras.ops.nn.txt @@ -0,0 +1,134 @@ +average_pool( + inputs, + pool_size, + strides=None, + padding='valid', + data_format=None +) +batch_normalization( + x, + mean, + variance, + axis, + offset=None, + scale=None, + epsilon=0.001 +) +binary_crossentropy( + target, + output, + from_logits=False +) +categorical_crossentropy( + target, + output, + from_logits=False, + axis=-1 +) +conv( + inputs, + kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +conv_transpose( + inputs, + kernel, + strides, + padding='valid', + output_padding=None, + data_format=None, + dilation_rate=1 +) +ctc_decode( + inputs, + sequence_lengths, + strategy, + beam_width=100, + top_paths=1, + merge_repeated=True, + mask_index=None +) +ctc_loss( + target, + output, + target_length, + output_length, + mask_index=0 +) +depthwise_conv( + inputs, + kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +elu(x, alpha=1.0) +gelu(x, approximate=True) +hard_sigmoid(x) +hard_silu(x) +hard_swish(x) +leaky_relu(x, negative_slope=0.2) +log_sigmoid(x) +log_softmax(x, axis=-1) +max_pool( + inputs, + pool_size, + strides=None, + padding='valid', + data_format=None +) +moments( + x, + axes, + keepdims=False, + synchronized=False +) +multi_hot( + inputs, + num_classes=None, + axis=-1, + dtype=None, + sparse=False, + **kwargs +) +normalize( + x, + axis=-1, + order=2 +) +one_hot( + x, + num_classes, + axis=-1, + dtype=None, + sparse=False +) +relu(x) +relu6(x) +selu(x) +separable_conv( + inputs, + depthwise_kernel, + pointwise_kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +sigmoid(x) +silu(x) +softmax(x, axis=-1) +softplus(x) +softsign(x) +sparse_categorical_crossentropy( + target, + output, + from_logits=False, + axis=-1 +) +swish(x) + diff --git a/.tether/man/keras.ops.numpy.txt b/.tether/man/keras.ops.numpy.txt new file mode 100644 index 0000000000..03866409bc --- /dev/null +++ b/.tether/man/keras.ops.numpy.txt @@ -0,0 +1,352 @@ +abs(x) +absolute(x) +add(x1, x2) +all( + x, + axis=None, + keepdims=False +) +amax( + x, + axis=None, + keepdims=False +) +amin( + x, + axis=None, + keepdims=False +) +any( + x, + axis=None, + keepdims=False +) +append( + x1, + x2, + axis=None +) +arange( + start, + stop=None, + step=1, + dtype=None +) +arccos(x) +arccosh(x) +arcsin(x) +arcsinh(x) +arctan(x) +arctan2(x1, x2) +arctanh(x) +argmax( + x, + axis=None, + keepdims=False +) +argmin( + x, + axis=None, + keepdims=False +) +argsort(x, axis=-1) +array(x, dtype=None) +average( + x, + axis=None, + weights=None +) +bincount( + x, + weights=None, + minlength=0, + sparse=False +) +broadcast_to(x, shape) +ceil(x) +clip( + x, + x_min, + x_max +) +concatenate(xs, axis=0) +conj(x) +conjugate(x) +copy(x) +correlate( + x1, + x2, + mode='valid' +) +cos(x) +cosh(x) +count_nonzero(x, axis=None) +cross( + x1, + x2, + axisa=-1, + axisb=-1, + axisc=-1, + axis=None +) +cumprod( + x, + axis=None, + dtype=None +) +cumsum( + x, + axis=None, + dtype=None +) +diag(x, k=0) +diagonal( + x, + offset=0, + axis1=0, + axis2=1 +) +diff( + a, + n=1, + axis=-1 +) +digitize(x, bins) +divide(x1, x2) +divide_no_nan(x1, x2) +dot(x1, x2) +einsum(subscripts, *operands) +empty(shape, dtype=None) +equal(x1, x2) +exp(x) +expand_dims(x, axis) +expm1(x) +eye( + N, + M=None, + k=0, + dtype=None +) +flip(x, axis=None) +floor(x) +floor_divide(x1, x2) +full( + shape, + fill_value, + dtype=None +) +full_like( + x, + fill_value, + dtype=None +) +get_item(x, key) +greater(x1, x2) +greater_equal(x1, x2) +hstack(xs) +identity(n, dtype=None) +imag(x) +isclose(x1, x2) +isfinite(x) +isinf(x) +isnan(x) +less(x1, x2) +less_equal(x1, x2) +linspace( + start, + stop, + num=50, + endpoint=True, + retstep=False, + dtype=None, + axis=0 +) +log(x) +log10(x) +log1p(x) +log2(x) +logaddexp(x1, x2) +logical_and(x1, x2) +logical_not(x) +logical_or(x1, x2) +logical_xor(x1, x2) +logspace( + start, + stop, + num=50, + endpoint=True, + base=10, + dtype=None, + axis=0 +) +matmul(x1, x2) +max( + x, + axis=None, + keepdims=False, + initial=None +) +maximum(x1, x2) +mean( + x, + axis=None, + keepdims=False +) +median( + x, + axis=None, + keepdims=False +) +meshgrid(*x, indexing='xy') +min( + x, + axis=None, + keepdims=False, + initial=None +) +minimum(x1, x2) +mod(x1, x2) +moveaxis( + x, + source, + destination +) +multiply(x1, x2) +nan_to_num( + x, + nan=0.0, + posinf=None, + neginf=None +) +ndim(x) +negative(x) +nonzero(x) +not_equal(x1, x2) +ones(shape, dtype=None) +ones_like(x, dtype=None) +outer(x1, x2) +pad( + x, + pad_width, + mode='constant', + constant_values=None +) +power(x1, x2) +prod( + x, + axis=None, + keepdims=False, + dtype=None +) +quantile( + x, + q, + axis=None, + method='linear', + keepdims=False +) +ravel(x) +real(x) +reciprocal(x) +repeat( + x, + repeats, + axis=None +) +reshape(x, newshape) +roll( + x, + shift, + axis=None +) +round(x, decimals=0) +select( + condlist, + choicelist, + default=0 +) +sign(x) +sin(x) +sinh(x) +size(x) +sort(x, axis=-1) +split( + x, + indices_or_sections, + axis=0 +) +sqrt(x) +square(x) +squeeze(x, axis=None) +stack(x, axis=0) +std( + x, + axis=None, + keepdims=False +) +subtract(x1, x2) +sum( + x, + axis=None, + keepdims=False +) +swapaxes( + x, + axis1, + axis2 +) +take( + x, + indices, + axis=None +) +take_along_axis( + x, + indices, + axis=None +) +tan(x) +tanh(x) +tensordot( + x1, + x2, + axes=2 +) +tile(x, repeats) +trace( + x, + offset=0, + axis1=0, + axis2=1 +) +transpose(x, axes=None) +tri( + N, + M=None, + k=0, + dtype=None +) +tril(x, k=0) +triu(x, k=0) +true_divide(x1, x2) +var( + x, + axis=None, + keepdims=False +) +vdot(x1, x2) +vectorize( + pyfunc, + *, + excluded=None, + signature=None +) +vstack(xs) +where( + condition, + x1=None, + x2=None +) +zeros(shape, dtype=None) +zeros_like(x, dtype=None) + diff --git a/.tether/man/keras.ops.txt b/.tether/man/keras.ops.txt new file mode 100644 index 0000000000..3142af6500 --- /dev/null +++ b/.tether/man/keras.ops.txt @@ -0,0 +1,623 @@ +abs(x) +absolute(x) +add(x1, x2) +all( + x, + axis=None, + keepdims=False +) +amax( + x, + axis=None, + keepdims=False +) +amin( + x, + axis=None, + keepdims=False +) +any( + x, + axis=None, + keepdims=False +) +append( + x1, + x2, + axis=None +) +arange( + start, + stop=None, + step=1, + dtype=None +) +arccos(x) +arccosh(x) +arcsin(x) +arcsinh(x) +arctan(x) +arctan2(x1, x2) +arctanh(x) +argmax( + x, + axis=None, + keepdims=False +) +argmin( + x, + axis=None, + keepdims=False +) +argsort(x, axis=-1) +array(x, dtype=None) +average( + x, + axis=None, + weights=None +) +average_pool( + inputs, + pool_size, + strides=None, + padding='valid', + data_format=None +) +batch_normalization( + x, + mean, + variance, + axis, + offset=None, + scale=None, + epsilon=0.001 +) +binary_crossentropy( + target, + output, + from_logits=False +) +bincount( + x, + weights=None, + minlength=0, + sparse=False +) +broadcast_to(x, shape) +cast(x, dtype) +categorical_crossentropy( + target, + output, + from_logits=False, + axis=-1 +) +ceil(x) +cholesky(x) +clip( + x, + x_min, + x_max +) +concatenate(xs, axis=0) +cond( + pred, + true_fn, + false_fn +) +conj(x) +conjugate(x) +conv( + inputs, + kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +conv_transpose( + inputs, + kernel, + strides, + padding='valid', + output_padding=None, + data_format=None, + dilation_rate=1 +) +convert_to_numpy(x) +convert_to_tensor( + x, + dtype=None, + sparse=None +) +copy(x) +correlate( + x1, + x2, + mode='valid' +) +cos(x) +cosh(x) +count_nonzero(x, axis=None) +cross( + x1, + x2, + axisa=-1, + axisb=-1, + axisc=-1, + axis=None +) +ctc_decode( + inputs, + sequence_lengths, + strategy, + beam_width=100, + top_paths=1, + merge_repeated=True, + mask_index=None +) +ctc_loss( + target, + output, + target_length, + output_length, + mask_index=0 +) +cumprod( + x, + axis=None, + dtype=None +) +cumsum( + x, + axis=None, + dtype=None +) +custom_gradient(f) +depthwise_conv( + inputs, + kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +det(x) +diag(x, k=0) +diagonal( + x, + offset=0, + axis1=0, + axis2=1 +) +diff( + a, + n=1, + axis=-1 +) +digitize(x, bins) +divide(x1, x2) +divide_no_nan(x1, x2) +dot(x1, x2) +eig(x) +eigh(x) +einsum(subscripts, *operands) +elu(x, alpha=1.0) +empty(shape, dtype=None) +equal(x1, x2) +erf(x) +erfinv(x) +exp(x) +expand_dims(x, axis) +expm1(x) +extract_sequences( + x, + sequence_length, + sequence_stride +) +eye( + N, + M=None, + k=0, + dtype=None +) +fft(x) +fft2(x) +flip(x, axis=None) +floor(x) +floor_divide(x1, x2) +fori_loop( + lower, + upper, + body_fun, + init_val +) +full( + shape, + fill_value, + dtype=None +) +full_like( + x, + fill_value, + dtype=None +) +gelu(x, approximate=True) +get_item(x, key) +greater(x1, x2) +greater_equal(x1, x2) +hard_sigmoid(x) +hard_silu(x) +hard_swish(x) +hstack(xs) +identity(n, dtype=None) +imag(x) +image: Module(keras.api.ops.image) +in_top_k( + targets, + predictions, + k +) +inv(x) +irfft(x, fft_length=None) +is_tensor(x) +isclose(x1, x2) +isfinite(x) +isinf(x) +isnan(x) +istft( + x, + sequence_length, + sequence_stride, + fft_length, + length=None, + window='hann', + center=True +) +leaky_relu(x, negative_slope=0.2) +less(x1, x2) +less_equal(x1, x2) +linalg: Module(keras.api.ops.linalg) +linspace( + start, + stop, + num=50, + endpoint=True, + retstep=False, + dtype=None, + axis=0 +) +log(x) +log_sigmoid(x) +log_softmax(x, axis=-1) +log10(x) +log1p(x) +log2(x) +logaddexp(x1, x2) +logical_and(x1, x2) +logical_not(x) +logical_or(x1, x2) +logical_xor(x1, x2) +logspace( + start, + stop, + num=50, + endpoint=True, + base=10, + dtype=None, + axis=0 +) +logsumexp( + x, + axis=None, + keepdims=False +) +lu_factor(x) +matmul(x1, x2) +max( + x, + axis=None, + keepdims=False, + initial=None +) +max_pool( + inputs, + pool_size, + strides=None, + padding='valid', + data_format=None +) +maximum(x1, x2) +mean( + x, + axis=None, + keepdims=False +) +median( + x, + axis=None, + keepdims=False +) +meshgrid(*x, indexing='xy') +min( + x, + axis=None, + keepdims=False, + initial=None +) +minimum(x1, x2) +mod(x1, x2) +moments( + x, + axes, + keepdims=False, + synchronized=False +) +moveaxis( + x, + source, + destination +) +multi_hot( + inputs, + num_classes=None, + axis=-1, + dtype=None, + sparse=False, + **kwargs +) +multiply(x1, x2) +nan_to_num( + x, + nan=0.0, + posinf=None, + neginf=None +) +ndim(x) +negative(x) +nn: Module(keras.api.ops.nn) +nonzero(x) +norm( + x, + ord=None, + axis=None, + keepdims=False +) +normalize( + x, + axis=-1, + order=2 +) +not_equal(x1, x2) +numpy: Module(keras.api.ops.numpy) +one_hot( + x, + num_classes, + axis=-1, + dtype=None, + sparse=False +) +ones(shape, dtype=None) +ones_like(x, dtype=None) +outer(x1, x2) +pad( + x, + pad_width, + mode='constant', + constant_values=None +) +power(x1, x2) +prod( + x, + axis=None, + keepdims=False, + dtype=None +) +qr(x, mode='reduced') +quantile( + x, + q, + axis=None, + method='linear', + keepdims=False +) +ravel(x) +real(x) +reciprocal(x) +relu(x) +relu6(x) +repeat( + x, + repeats, + axis=None +) +reshape(x, newshape) +rfft(x, fft_length=None) +roll( + x, + shift, + axis=None +) +round(x, decimals=0) +rsqrt(x) +scatter( + indices, + values, + shape +) +scatter_update( + inputs, + indices, + updates +) +segment_max( + data, + segment_ids, + num_segments=None, + sorted=False +) +segment_sum( + data, + segment_ids, + num_segments=None, + sorted=False +) +select( + condlist, + choicelist, + default=0 +) +selu(x) +separable_conv( + inputs, + depthwise_kernel, + pointwise_kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +shape(x) +sigmoid(x) +sign(x) +silu(x) +sin(x) +sinh(x) +size(x) +slice( + inputs, + start_indices, + shape +) +slice_update( + inputs, + start_indices, + updates +) +softmax(x, axis=-1) +softplus(x) +softsign(x) +solve(a, b) +solve_triangular( + a, + b, + lower=False +) +sort(x, axis=-1) +sparse_categorical_crossentropy( + target, + output, + from_logits=False, + axis=-1 +) +split( + x, + indices_or_sections, + axis=0 +) +sqrt(x) +square(x) +squeeze(x, axis=None) +stack(x, axis=0) +std( + x, + axis=None, + keepdims=False +) +stft( + x, + sequence_length, + sequence_stride, + fft_length, + window='hann', + center=True +) +stop_gradient(variable) +subtract(x1, x2) +sum( + x, + axis=None, + keepdims=False +) +svd( + x, + full_matrices=True, + compute_uv=True +) +swapaxes( + x, + axis1, + axis2 +) +swish(x) +take( + x, + indices, + axis=None +) +take_along_axis( + x, + indices, + axis=None +) +tan(x) +tanh(x) +tensordot( + x1, + x2, + axes=2 +) +tile(x, repeats) +top_k( + x, + k, + sorted=True +) +trace( + x, + offset=0, + axis1=0, + axis2=1 +) +transpose(x, axes=None) +tri( + N, + M=None, + k=0, + dtype=None +) +tril(x, k=0) +triu(x, k=0) +true_divide(x1, x2) +unstack( + x, + num=None, + axis=0 +) +var( + x, + axis=None, + keepdims=False +) +vdot(x1, x2) +vectorize( + pyfunc, + *, + excluded=None, + signature=None +) +vectorized_map(function, elements) +vstack(xs) +where( + condition, + x1=None, + x2=None +) +while_loop( + cond, + body, + loop_vars, + maximum_iterations=None +) +zeros(shape, dtype=None) +zeros_like(x, dtype=None) + diff --git a/.tether/man/keras.optimizers.legacy.txt b/.tether/man/keras.optimizers.legacy.txt new file mode 100644 index 0000000000..8c6dcb1cf4 --- /dev/null +++ b/.tether/man/keras.optimizers.legacy.txt @@ -0,0 +1,7 @@ +Adagrad(*args, **kwargs) +Adam(*args, **kwargs) +Ftrl(*args, **kwargs) +Optimizer(*args, **kwargs) +RMSprop(*args, **kwargs) +SGD(*args, **kwargs) + diff --git a/.tether/man/keras.optimizers.schedules.txt b/.tether/man/keras.optimizers.schedules.txt new file mode 100644 index 0000000000..023423056f --- /dev/null +++ b/.tether/man/keras.optimizers.schedules.txt @@ -0,0 +1,47 @@ +CosineDecay( + initial_learning_rate, + decay_steps, + alpha=0.0, + name='CosineDecay', + warmup_target=None, + warmup_steps=0 +) +CosineDecayRestarts( + initial_learning_rate, + first_decay_steps, + t_mul=2.0, + m_mul=1.0, + alpha=0.0, + name='SGDRDecay' +) +deserialize(config, custom_objects=None) +ExponentialDecay( + initial_learning_rate, + decay_steps, + decay_rate, + staircase=False, + name='ExponentialDecay' +) +InverseTimeDecay( + initial_learning_rate, + decay_steps, + decay_rate, + staircase=False, + name='InverseTimeDecay' +) +LearningRateSchedule() +PiecewiseConstantDecay( + boundaries, + values, + name='PiecewiseConstant' +) +PolynomialDecay( + initial_learning_rate, + decay_steps, + end_learning_rate=0.0001, + power=1.0, + cycle=False, + name='PolynomialDecay' +) +serialize(learning_rate_schedule) + diff --git a/.tether/man/keras.optimizers.txt b/.tether/man/keras.optimizers.txt new file mode 100644 index 0000000000..66429c020a --- /dev/null +++ b/.tether/man/keras.optimizers.txt @@ -0,0 +1,204 @@ +Adadelta( + learning_rate=0.001, + rho=0.95, + epsilon=1e-07, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='adadelta', + **kwargs +) +Adafactor( + learning_rate=0.001, + beta_2_decay=-0.8, + epsilon_1=1e-30, + epsilon_2=0.001, + clip_threshold=1.0, + relative_step=True, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='adafactor', + **kwargs +) +Adagrad( + learning_rate=0.001, + initial_accumulator_value=0.1, + epsilon=1e-07, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='adagrad', + **kwargs +) +Adam( + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-07, + amsgrad=False, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='adam', + **kwargs +) +Adamax( + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-07, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='adamax', + **kwargs +) +AdamW( + learning_rate=0.001, + weight_decay=0.004, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-07, + amsgrad=False, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='adamw', + **kwargs +) +deserialize(config, custom_objects=None) +Ftrl( + learning_rate=0.001, + learning_rate_power=-0.5, + initial_accumulator_value=0.1, + l1_regularization_strength=0.0, + l2_regularization_strength=0.0, + l2_shrinkage_regularization_strength=0.0, + beta=0.0, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='ftrl', + **kwargs +) +get(identifier) +legacy: Module(keras.api.optimizers.legacy) +Lion( + learning_rate=0.001, + beta_1=0.9, + beta_2=0.99, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='lion', + **kwargs +) +LossScaleOptimizer( + inner_optimizer, + initial_scale=32768.0, + dynamic_growth_steps=2000, + **kwargs +) +Nadam( + learning_rate=0.001, + beta_1=0.9, + beta_2=0.999, + epsilon=1e-07, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='nadam', + **kwargs +) +Optimizer(*args, **kwargs) +RMSprop( + learning_rate=0.001, + rho=0.9, + momentum=0.0, + epsilon=1e-07, + centered=False, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='rmsprop', + **kwargs +) +schedules: Module(keras.api.optimizers.schedules) +serialize(optimizer) +SGD( + learning_rate=0.01, + momentum=0.0, + nesterov=False, + weight_decay=None, + clipnorm=None, + clipvalue=None, + global_clipnorm=None, + use_ema=False, + ema_momentum=0.99, + ema_overwrite_frequency=None, + loss_scale_factor=None, + gradient_accumulation_steps=None, + name='SGD', + **kwargs +) + diff --git a/.tether/man/keras.preprocessing.image.txt b/.tether/man/keras.preprocessing.image.txt new file mode 100644 index 0000000000..ff1a55e652 --- /dev/null +++ b/.tether/man/keras.preprocessing.image.txt @@ -0,0 +1,34 @@ +array_to_img( + x, + data_format=None, + scale=True, + dtype=None +) +img_to_array( + img, + data_format=None, + dtype=None +) +load_img( + path, + color_mode='rgb', + target_size=None, + interpolation='nearest', + keep_aspect_ratio=False +) +save_img( + path, + x, + data_format=None, + file_format=None, + scale=True, + **kwargs +) +smart_resize( + x, + size, + interpolation='bilinear', + data_format='channels_last', + backend_module=None +) + diff --git a/.tether/man/keras.preprocessing.sequence.txt b/.tether/man/keras.preprocessing.sequence.txt new file mode 100644 index 0000000000..1bb00ee180 --- /dev/null +++ b/.tether/man/keras.preprocessing.sequence.txt @@ -0,0 +1,9 @@ +pad_sequences( + sequences, + maxlen=None, + dtype='int32', + padding='pre', + truncating='pre', + value=0.0 +) + diff --git a/.tether/man/keras.preprocessing.txt b/.tether/man/keras.preprocessing.txt new file mode 100644 index 0000000000..c7da2c5348 --- /dev/null +++ b/.tether/man/keras.preprocessing.txt @@ -0,0 +1,48 @@ +image: Module(keras.api.preprocessing.image) +image_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + color_mode='rgb', + batch_size=32, + image_size=(256, 256), + shuffle=True, + seed=None, + validation_split=None, + subset=None, + interpolation='bilinear', + follow_links=False, + crop_to_aspect_ratio=False, + pad_to_aspect_ratio=False, + data_format=None, + verbose=True +) +sequence: Module(keras.api.preprocessing.sequence) +text_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + batch_size=32, + max_length=None, + shuffle=True, + seed=None, + validation_split=None, + subset=None, + follow_links=False, + verbose=True +) +timeseries_dataset_from_array( + data, + targets, + sequence_length, + sequence_stride=1, + sampling_rate=1, + batch_size=128, + shuffle=False, + seed=None, + start_index=None, + end_index=None +) + diff --git a/.tether/man/keras.quantizers.txt b/.tether/man/keras.quantizers.txt new file mode 100644 index 0000000000..603b088a5b --- /dev/null +++ b/.tether/man/keras.quantizers.txt @@ -0,0 +1,31 @@ +abs_max_quantize( + inputs, + axis, + value_range=(-127, 127), + dtype='int8', + epsilon=1e-07 +) +AbsMaxQuantizer( + axis, + value_range=(-127, 127), + epsilon=1e-07, + output_dtype='int8' +) +compute_float8_amax_history(x, amax_history) +compute_float8_scale( + amax, + scale, + dtype_max, + margin=0 +) +deserialize(config, custom_objects=None) +get(identifier, **kwargs) +quantize_and_dequantize( + inputs, + scale, + quantized_dtype, + compute_dtype +) +Quantizer(output_dtype='int8') +serialize(initializer) + diff --git a/.tether/man/keras.random.txt b/.tether/man/keras.random.txt new file mode 100644 index 0000000000..0b82263f81 --- /dev/null +++ b/.tether/man/keras.random.txt @@ -0,0 +1,71 @@ +beta( + shape, + alpha, + beta, + dtype=None, + seed=None +) +binomial( + shape, + counts, + probabilities, + dtype=None, + seed=None +) +categorical( + logits, + num_samples, + dtype='int32', + seed=None +) +dropout( + inputs, + rate, + noise_shape=None, + seed=None +) +gamma( + shape, + alpha, + dtype=None, + seed=None +) +normal( + shape, + mean=0.0, + stddev=1.0, + dtype=None, + seed=None +) +randint( + shape, + minval, + maxval, + dtype='int32', + seed=None +) +SeedGenerator( + seed=None, + name=None, + **kwargs +) +shuffle( + x, + axis=0, + seed=None +) +truncated_normal( + shape, + mean=0.0, + stddev=1.0, + dtype=None, + seed=None +) +uniform( + shape, + minval=0.0, + maxval=1.0, + dtype=None, + seed=None +) + diff --git a/.tether/man/keras.regularizers.txt b/.tether/man/keras.regularizers.txt new file mode 100644 index 0000000000..30ccc7126e --- /dev/null +++ b/.tether/man/keras.regularizers.txt @@ -0,0 +1,13 @@ +deserialize(config, custom_objects=None) +get(identifier) +l1(l1=0.01) +L1(l1=0.01) +l1_l2(l1=0.0, l2=0.0) +L1L2(l1=0.0, l2=0.0) +l2(l2=0.01) +L2(l2=0.01) +orthogonal_regularizer(factor=0.01, mode='rows') +OrthogonalRegularizer(factor=0.01, mode='rows') +Regularizer() +serialize(initializer) + diff --git a/.tether/man/keras.saving.txt b/.tether/man/keras.saving.txt new file mode 100644 index 0000000000..389cc60dd5 --- /dev/null +++ b/.tether/man/keras.saving.txt @@ -0,0 +1,42 @@ +custom_object_scope(custom_objects) +CustomObjectScope(custom_objects) +deserialize_keras_object( + config, + custom_objects=None, + safe_mode=True, + **kwargs +) +get_custom_objects() +get_registered_name(obj) +get_registered_object( + name, + custom_objects=None, + module_objects=None +) +load_model( + filepath, + custom_objects=None, + compile=True, + safe_mode=True +) +load_weights( + model, + filepath, + skip_mismatch=False, + **kwargs +) +register_keras_serializable(package='Custom', name=None) +save_model( + model, + filepath, + overwrite=True, + **kwargs +) +save_weights( + model, + filepath, + overwrite=True, + **kwargs +) +serialize_keras_object(obj) + diff --git a/.tether/man/keras.tree.txt b/.tether/man/keras.tree.txt new file mode 100644 index 0000000000..faa6953768 --- /dev/null +++ b/.tether/man/keras.tree.txt @@ -0,0 +1,26 @@ +assert_same_structure( + a, + b, + check_types=True +) +flatten(structure) +is_nested(structure) +lists_to_tuples(structure) +map_shape_structure(func, structure) +map_structure(func, *structures) +map_structure_up_to( + shallow_structure, + func, + *structures +) +pack_sequence_as( + structure, + flat_sequence, + sequence_fn=None +) +traverse( + func, + structure, + top_down=True +) + diff --git a/.tether/man/keras.txt b/.tether/man/keras.txt new file mode 100644 index 0000000000..6c5b01579e --- /dev/null +++ b/.tether/man/keras.txt @@ -0,0 +1,95 @@ +activations: Module(keras.api.activations) +applications: Module(keras.api.applications) +backend: Module(keras.api.backend) +callbacks: Module(keras.api.callbacks) +config: Module(keras.api.config) +constraints: Module(keras.api.constraints) +datasets: Module(keras.api.datasets) +device(device_name) +distribution: Module(keras.api.distribution) +dtype_policies: Module(keras.api.dtype_policies) +DTypePolicy( + name, + *args, + **kwargs +) +export: Module(keras.api.export) +FloatDTypePolicy(name) +Function( + inputs, + outputs, + name=None +) +Initializer() +initializers: Module(keras.api.initializers) +Input( + shape=None, + batch_size=None, + dtype=None, + sparse=None, + batch_shape=None, + name=None, + tensor=None +) +InputSpec( + dtype=None, + shape=None, + ndim=None, + max_ndim=None, + min_ndim=None, + axes=None, + allow_last_axis_squeeze=False, + name=None +) +KerasTensor( + shape, + dtype='float32', + sparse=False, + record_history=True, + name=None +) +Layer(*args, **kwargs) +layers: Module(keras.api.layers) +legacy: Module(keras.api.legacy) +Loss( + name=None, + reduction='sum_over_batch_size', + dtype=None +) +losses: Module(keras.api.losses) +Metric(dtype=None, name=None) +metrics: Module(keras.api.metrics) +mixed_precision: Module(keras.api.mixed_precision) +Model(*args, **kwargs) +models: Module(keras.api.models) +name_scope(name, **kwargs) +Operation(*args, **kwargs) +ops: Module(keras.api.ops) +Optimizer(*args, **kwargs) +optimizers: Module(keras.api.optimizers) +preprocessing: Module(keras.api.preprocessing) +Quantizer(output_dtype='int8') +quantizers: Module(keras.api.quantizers) +random: Module(keras.api.random) +Regularizer() +regularizers: Module(keras.api.regularizers) +saving: Module(keras.api.saving) +Sequential(*args, **kwargs) +StatelessScope( + state_mapping=None, + collect_losses=False, + initialize_variables=True +) +tree: Module(keras.api.tree) +utils: Module(keras.api.utils) +Variable( + initializer, + shape=None, + dtype=None, + trainable=True, + autocast=True, + aggregation='mean', + name=None +) +version() + diff --git a/.tether/man/keras.utils.legacy.txt b/.tether/man/keras.utils.legacy.txt new file mode 100644 index 0000000000..44f4183c4f --- /dev/null +++ b/.tether/man/keras.utils.legacy.txt @@ -0,0 +1,8 @@ +deserialize_keras_object( + identifier, + module_objects=None, + custom_objects=None, + printable_module_name='object' +) +serialize_keras_object(instance) + diff --git a/.tether/man/keras.utils.txt b/.tether/man/keras.utils.txt new file mode 100644 index 0000000000..b18132715a --- /dev/null +++ b/.tether/man/keras.utils.txt @@ -0,0 +1,207 @@ +array_to_img( + x, + data_format=None, + scale=True, + dtype=None +) +audio_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + batch_size=32, + sampling_rate=None, + output_sequence_length=None, + ragged=False, + shuffle=True, + seed=None, + validation_split=None, + subset=None, + follow_links=False, + verbose=True +) +clear_session(free_memory=True) +custom_object_scope(custom_objects) +CustomObjectScope(custom_objects) +deserialize_keras_object( + config, + custom_objects=None, + safe_mode=True, + **kwargs +) +disable_interactive_logging() +enable_interactive_logging() +FeatureSpace( + features, + output_mode='concat', + crosses=None, + crossing_dim=32, + hashing_dim=32, + num_discretization_bins=32, + name=None +) +get_custom_objects() +get_file( + fname=None, + origin=None, + untar=False, + md5_hash=None, + file_hash=None, + cache_subdir='datasets', + hash_algorithm='auto', + extract=False, + archive_format='auto', + cache_dir=None, + force_download=False +) +get_registered_name(obj) +get_registered_object( + name, + custom_objects=None, + module_objects=None +) +get_source_inputs(tensor) +image_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + color_mode='rgb', + batch_size=32, + image_size=(256, 256), + shuffle=True, + seed=None, + validation_split=None, + subset=None, + interpolation='bilinear', + follow_links=False, + crop_to_aspect_ratio=False, + pad_to_aspect_ratio=False, + data_format=None, + verbose=True +) +img_to_array( + img, + data_format=None, + dtype=None +) +is_interactive_logging_enabled() +is_keras_tensor(x) +legacy: Module(keras.api.utils.legacy) +load_img( + path, + color_mode='rgb', + target_size=None, + interpolation='nearest', + keep_aspect_ratio=False +) +model_to_dot( + model, + show_shapes=False, + show_dtype=False, + show_layer_names=True, + rankdir='TB', + expand_nested=False, + dpi=200, + subgraph=False, + show_layer_activations=False, + show_trainable=False, + **kwargs +) +normalize( + x, + axis=-1, + order=2 +) +pack_x_y_sample_weight( + x, + y=None, + sample_weight=None +) +pad_sequences( + sequences, + maxlen=None, + dtype='int32', + padding='pre', + truncating='pre', + value=0.0 +) +plot_model( + model, + to_file='model.png', + show_shapes=False, + show_dtype=False, + show_layer_names=False, + rankdir='TB', + expand_nested=False, + dpi=200, + show_layer_activations=False, + show_trainable=False, + **kwargs +) +Progbar( + target, + width=20, + verbose=1, + interval=0.05, + stateful_metrics=None, + unit_name='step' +) +PyDataset( + workers=1, + use_multiprocessing=False, + max_queue_size=10 +) +register_keras_serializable(package='Custom', name=None) +save_img( + path, + x, + data_format=None, + file_format=None, + scale=True, + **kwargs +) +Sequence( + workers=1, + use_multiprocessing=False, + max_queue_size=10 +) +serialize_keras_object(obj) +set_random_seed(seed) +split_dataset( + dataset, + left_size=None, + right_size=None, + shuffle=False, + seed=None +) +standardize_dtype(dtype) +text_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + batch_size=32, + max_length=None, + shuffle=True, + seed=None, + validation_split=None, + subset=None, + follow_links=False, + verbose=True +) +timeseries_dataset_from_array( + data, + targets, + sequence_length, + sequence_stride=1, + sampling_rate=1, + batch_size=128, + shuffle=False, + seed=None, + start_index=None, + end_index=None +) +to_categorical(x, num_classes=None) +unpack_x_y_sample_weight(data) + diff --git a/.tether/man/keras_input.txt b/.tether/man/keras_input.txt new file mode 100644 index 0000000000..c36df56073 --- /dev/null +++ b/.tether/man/keras_input.txt @@ -0,0 +1,55 @@ +__signature__ +keras.layers.Input( + shape=None, + batch_size=None, + dtype=None, + sparse=None, + batch_shape=None, + name=None, + tensor=None +) +__doc__ +Used to instantiate a Keras tensor. + +A Keras tensor is a symbolic tensor-like object, which we augment with +certain attributes that allow us to build a Keras model just by knowing the +inputs and outputs of the model. + +For instance, if `a`, `b` and `c` are Keras tensors, +it becomes possible to do: +`model = Model(input=[a, b], output=c)` + +Args: + shape: A shape tuple (tuple of integers or `None` objects), + not including the batch size. + For instance, `shape=(32,)` indicates that the expected input + will be batches of 32-dimensional vectors. Elements of this tuple + can be `None`; `None` elements represent dimensions where the shape + is not known and may vary (e.g. sequence length). + batch_size: Optional static batch size (integer). + dtype: The data type expected by the input, as a string + (e.g. `"float32"`, `"int32"`...) + sparse: A boolean specifying whether the expected input will be sparse + tensors. Note that, if `sparse` is `False`, sparse tensors can still + be passed into the input - they will be densified with a default + value of 0. This feature is only supported with the TensorFlow + backend. Defaults to `False`. + name: Optional name string for the layer. + Should be unique in a model (do not reuse the same name twice). + It will be autogenerated if it isn't provided. + tensor: Optional existing tensor to wrap into the `Input` layer. + If set, the layer will use this tensor rather + than creating a new placeholder tensor. + +Returns: + A Keras tensor. + +Example: + +```python +# This is a logistic regression in Keras +x = Input(shape=(32,)) +y = Dense(16, activation='softmax')(x) +model = Model(x, y) +``` + diff --git a/.tether/man/keras_model.txt b/.tether/man/keras_model.txt new file mode 100644 index 0000000000..ac9be46bf2 --- /dev/null +++ b/.tether/man/keras_model.txt @@ -0,0 +1,393 @@ +Help on class Model in module keras.src.models.model: + +class Model(keras.src.backend.tensorflow.trainer.TensorFlowTrainer, keras.src.trainers.trainer.Trainer, keras.src.layers.layer.Layer) + | Model(*args, **kwargs) + | + | A model grouping layers into an object with training/inference features. + | + | There are three ways to instantiate a `Model`: + | + | ## With the "Functional API" + | + | You start from `Input`, + | you chain layer calls to specify the model's forward pass, + | and finally you create your model from inputs and outputs: + | + | ```python + | inputs = keras.Input(shape=(37,)) + | x = keras.layers.Dense(32, activation="relu")(inputs) + | outputs = keras.layers.Dense(5, activation="softmax")(x) + | model = keras.Model(inputs=inputs, outputs=outputs) + | ``` + | + | Note: Only dicts, lists, and tuples of input tensors are supported. Nested + | inputs are not supported (e.g. lists of list or dicts of dict). + | + | A new Functional API model can also be created by using the + | intermediate tensors. This enables you to quickly extract sub-components + | of the model. + | + | Example: + | + | ```python + | inputs = keras.Input(shape=(None, None, 3)) + | processed = keras.layers.RandomCrop(width=128, height=128)(inputs) + | conv = keras.layers.Conv2D(filters=32, kernel_size=3)(processed) + | pooling = keras.layers.GlobalAveragePooling2D()(conv) + | feature = keras.layers.Dense(10)(pooling) + | + | full_model = keras.Model(inputs, feature) + | backbone = keras.Model(processed, conv) + | activations = keras.Model(conv, feature) + | ``` + | + | Note that the `backbone` and `activations` models are not + | created with `keras.Input` objects, but with the tensors that originate + | from `keras.Input` objects. Under the hood, the layers and weights will + | be shared across these models, so that user can train the `full_model`, and + | use `backbone` or `activations` to do feature extraction. + | The inputs and outputs of the model can be nested structures of tensors as + | well, and the created models are standard Functional API models that support + | all the existing APIs. + | + | ## By subclassing the `Model` class + | + | In that case, you should define your + | layers in `__init__()` and you should implement the model's forward pass + | in `call()`. + | + | ```python + | class MyModel(keras.Model): + | def __init__(self): + | super().__init__() + | self.dense1 = keras.layers.Dense(32, activation="relu") + | self.dense2 = keras.layers.Dense(5, activation="softmax") + | + | def call(self, inputs): + | x = self.dense1(inputs) + | return self.dense2(x) + | + | model = MyModel() + | ``` + | + | If you subclass `Model`, you can optionally have + | a `training` argument (boolean) in `call()`, which you can use to specify + | a different behavior in training and inference: + | + | ```python + | class MyModel(keras.Model): + | def __init__(self): + | super().__init__() + | self.dense1 = keras.layers.Dense(32, activation="relu") + | self.dense2 = keras.layers.Dense(5, activation="softmax") + | self.dropout = keras.layers.Dropout(0.5) + | + | def call(self, inputs, training=False): + | x = self.dense1(inputs) + | x = self.dropout(x, training=training) + | return self.dense2(x) + | + | model = MyModel() + | ``` + | + | Once the model is created, you can config the model with losses and metrics + | with `model.compile()`, train the model with `model.fit()`, or use the model + | to do prediction with `model.predict()`. + | + | ## With the `Sequential` class + | + | In addition, `keras.Sequential` is a special case of model where + | the model is purely a stack of single-input, single-output layers. + | + | ```python + | model = keras.Sequential([ + | keras.Input(shape=(None, None, 3)), + | keras.layers.Conv2D(filters=32, kernel_size=3), + | ]) + | ``` + | + | Method resolution order: + | Model + | keras.src.backend.tensorflow.trainer.TensorFlowTrainer + | keras.src.trainers.trainer.Trainer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | *args, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | __reduce__(self) + | __reduce__ is used to customize the behavior of `pickle.pickle()`. + | + | The method returns a tuple of two elements: a function, and a list of + | arguments to pass to that function. In this case we just leverage the + | keras saving library. + | + | build_from_config(self, config) + | Builds the layer's states with the supplied config dict. + | + | By default, this method calls the `build(config["input_shape"])` method, + | which creates weights based on the layer's input shape in the supplied + | config. If your config contains other information needed to load the + | layer's state, you should override this method. + | + | Args: + | config: Dict containing the input shape associated with this layer. + | + | call( + | self, + | *args, + | **kwargs + | ) + | + | export( + | self, + | filepath, + | format='tf_saved_model' + | ) + | Create a TF SavedModel artifact for inference. + | + | **Note:** This can currently only be used with + | the TensorFlow or JAX backends. + | + | This method lets you export a model to a lightweight SavedModel artifact + | that contains the model's forward pass only (its `call()` method) + | and can be served via e.g. TF-Serving. The forward pass is registered + | under the name `serve()` (see example below). + | + | The original code of the model (including any custom layers you may + | have used) is *no longer* necessary to reload the artifact -- it is + | entirely standalone. + | + | Args: + | filepath: `str` or `pathlib.Path` object. Path where to save + | the artifact. + | + | Example: + | + | ```python + | # Create the artifact + | model.export("path/to/location") + | + | # Later, in a different process / environment... + | reloaded_artifact = tf.saved_model.load("path/to/location") + | predictions = reloaded_artifact.serve(input_data) + | ``` + | + | If you would like to customize your serving endpoints, you can + | use the lower-level `keras.export.ExportArchive` class. The + | `export()` method relies on `ExportArchive` internally. + | + | get_layer( + | self, + | name=None, + | index=None + | ) + | Retrieves a layer based on either its name (unique) or index. + | + | If `name` and `index` are both provided, `index` will take precedence. + | Indices are based on order of horizontal graph traversal (bottom-up). + | + | Args: + | name: String, name of layer. + | index: Integer, index of layer. + | + | Returns: + | A layer instance. + | + | load_weights( + | self, + | filepath, + | skip_mismatch=False, + | **kwargs + | ) + | Load weights from a file saved via `save_weights()`. + | + | Weights are loaded based on the network's + | topology. This means the architecture should be the same as when the + | weights were saved. Note that layers that don't have weights are not + | taken into account in the topological ordering, so adding or removing + | layers is fine as long as they don't have weights. + | + | **Partial weight loading** + | + | If you have modified your model, for instance by adding a new layer + | (with weights) or by changing the shape of the weights of a layer, + | you can choose to ignore errors and continue loading + | by setting `skip_mismatch=True`. In this case any layer with + | mismatching weights will be skipped. A warning will be displayed + | for each skipped layer. + | + | Args: + | filepath: String, path to the weights file to load. + | It can either be a `.weights.h5` file + | or a legacy `.h5` weights file. + | skip_mismatch: Boolean, whether to skip loading of layers where + | there is a mismatch in the number of weights, or a mismatch in + | the shape of the weights. + | + | quantize(self, mode) + | Quantize the weights of the model. + | + | Note that the model must be built first before calling this method. + | `quantize` will recursively call `quantize(mode)` in all layers and + | will be skipped if the layer doesn't implement the function. + | + | Args: + | mode: The mode of the quantization. Only 'int8' is supported at this + | time. + | + | save( + | self, + | filepath, + | overwrite=True, + | **kwargs + | ) + | Saves a model as a `.keras` file. + | + | Args: + | filepath: `str` or `pathlib.Path` object. Path where to save + | the model. Must end in `.keras`. + | overwrite: Whether we should overwrite any existing model at + | the target location, or instead ask the user via + | an interactive prompt. + | save_format: The `save_format` argument is deprecated in Keras 3. + | Format to use, as a string. Only the `"keras"` format is + | supported at this time. + | + | Example: + | + | ```python + | model = keras.Sequential( + | [ + | keras.layers.Dense(5, input_shape=(3,)), + | keras.layers.Softmax(), + | ], + | ) + | model.save("model.keras") + | loaded_model = keras.saving.load_model("model.keras") + | x = keras.random.uniform((10, 3)) + | assert np.allclose(model.predict(x), loaded_model.predict(x)) + | ``` + | + | Note that `model.save()` is an alias for `keras.saving.save_model()`. + | + | The saved `.keras` file contains: + | + | - The model's configuration (architecture) + | - The model's weights + | - The model's optimizer's state (if any) + | + | Thus models can be reinstantiated in the exact same state. + | + | save_weights( + | self, + | filepath, + | overwrite=True + | ) + | Saves all layer weights to a `.weights.h5` file. + | + | Args: + | filepath: `str` or `pathlib.Path` object. + | Path where to save the model. Must end in `.weights.h5`. + | overwrite: Whether we should overwrite any existing model + | at the target location, or instead ask the user + | via an interactive prompt. + | + | summary( + | self, + | line_length=None, + | positions=None, + | print_fn=None, + | expand_nested=False, + | show_trainable=False, + | layer_range=None + | ) + | Prints a string summary of the network. + | + | Args: + | line_length: Total length of printed lines + | (e.g. set this to adapt the display to different + | terminal window sizes). + | positions: Relative or absolute positions of log elements + | in each line. If not provided, becomes + | `[0.3, 0.6, 0.70, 1.]`. Defaults to `None`. + | print_fn: Print function to use. By default, prints to `stdout`. + | If `stdout` doesn't work in your environment, change to `print`. + | It will be called on each line of the summary. + | You can set it to a custom function + | in order to capture the string summary. + | expand_nested: Whether to expand the nested models. + | Defaults to `False`. + | show_trainable: Whether to show if a layer is trainable. + | Defaults to `False`. + | layer_range: a list or tuple of 2 strings, + | which is the starting layer name and ending layer name + | (both inclusive) indicating the range of layers to be printed + | in summary. It also accepts regex patterns instead of exact + | name. In such case, start predicate will be the first element + | it matches to `layer_range[0]` and the end predicate will be + | the last element it matches to `layer_range[1]`. + | By default `None` which considers all layers of model. + | + | Raises: + | ValueError: if `summary()` is called before the model is built. + | + | to_json(self, **kwargs) + | Returns a JSON string containing the network configuration. + | + | To load a network from a JSON save file, use + | `keras.models.model_from_json(json_string, custom_objects={...})`. + | + | Args: + | **kwargs: Additional keyword arguments to be passed to + | `json.dumps()`. + | + | Returns: + | A JSON string. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Static methods defined here: + | + | __new__( + | cls, + | *args, + | **kwargs + | ) + | Create and return a new object. See help(type) for accurate signature. + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | layers + | + diff --git a/.tether/man/keras_model_sequential.txt b/.tether/man/keras_model_sequential.txt new file mode 100644 index 0000000000..b512cbe805 --- /dev/null +++ b/.tether/man/keras_model_sequential.txt @@ -0,0 +1,152 @@ +Help on class Sequential in module keras.src.models.sequential: + +class Sequential(keras.src.models.model.Model) + | Sequential(*args, **kwargs) + | + | `Sequential` groups a linear stack of layers into a `Model`. + | + | Examples: + | + | ```python + | model = keras.Sequential() + | model.add(keras.Input(shape=(16,))) + | model.add(keras.layers.Dense(8)) + | + | # Note that you can also omit the initial `Input`. + | # In that case the model doesn't have any weights until the first call + | # to a training/evaluation method (since it isn't yet built): + | model = keras.Sequential() + | model.add(keras.layers.Dense(8)) + | model.add(keras.layers.Dense(4)) + | # model.weights not created yet + | + | # Whereas if you specify an `Input`, the model gets built + | # continuously as you are adding layers: + | model = keras.Sequential() + | model.add(keras.Input(shape=(16,))) + | model.add(keras.layers.Dense(8)) + | len(model.weights) # Returns "2" + | + | # When using the delayed-build pattern (no input shape specified), you can + | # choose to manually build your model by calling + | # `build(batch_input_shape)`: + | model = keras.Sequential() + | model.add(keras.layers.Dense(8)) + | model.add(keras.layers.Dense(4)) + | model.build((None, 16)) + | len(model.weights) # Returns "4" + | + | # Note that when using the delayed-build pattern (no input shape specified), + | # the model gets built the first time you call `fit`, `eval`, or `predict`, + | # or the first time you call the model on some input data. + | model = keras.Sequential() + | model.add(keras.layers.Dense(8)) + | model.add(keras.layers.Dense(1)) + | model.compile(optimizer='sgd', loss='mse') + | # This builds the model for the first time: + | model.fit(x, y, batch_size=32, epochs=10) + | ``` + | + | Method resolution order: + | Sequential + | keras.src.models.model.Model + | keras.src.backend.tensorflow.trainer.TensorFlowTrainer + | keras.src.trainers.trainer.Trainer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | layers=None, + | trainable=True, + | name=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | add( + | self, + | layer, + | rebuild=True + | ) + | Adds a layer instance on top of the layer stack. + | + | Args: + | layer: layer instance. + | + | build(self, input_shape=None) + | + | call( + | self, + | inputs, + | training=None, + | mask=None + | ) + | + | compute_output_spec( + | self, + | inputs, + | training=None, + | mask=None + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | pop(self, rebuild=True) + | Removes the last layer in the model. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Static methods defined here: + | + | __new__( + | cls, + | *args, + | **kwargs + | ) + | Create and return a new object. See help(type) for accurate signature. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | input_dtype + | The dtype layer inputs should be converted to. + | + | input_shape + | + | inputs + | + | layers + | + | output_shape + | + | outputs + | + diff --git a/.tether/man/layer_activation.txt b/.tether/man/layer_activation.txt new file mode 100644 index 0000000000..65df0bbe29 --- /dev/null +++ b/.tether/man/layer_activation.txt @@ -0,0 +1,51 @@ +Help on class Activation in module keras.src.layers.activations.activation: + +class Activation(keras.src.layers.layer.Layer) + | Activation(activation, **kwargs) + | + | Applies an activation function to an output. + | + | Args: + | activation: Activation function. It could be a callable, or the name of + | an activation from the `keras.activations` namespace. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Example: + | + | >>> layer = keras.layers.Activation('relu') + | >>> layer([-3.0, -1.0, 0.0, 2.0]) + | [0.0, 0.0, 0.0, 2.0] + | >>> layer = keras.layers.Activation(keras.activations.relu) + | >>> layer([-3.0, -1.0, 0.0, 2.0]) + | [0.0, 0.0, 0.0, 2.0] + | + | Method resolution order: + | Activation + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | activation, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_activation_elu.txt b/.tether/man/layer_activation_elu.txt new file mode 100644 index 0000000000..8a6a58f100 --- /dev/null +++ b/.tether/man/layer_activation_elu.txt @@ -0,0 +1,42 @@ +Help on class ELU in module keras.src.layers.activations.elu: + +class ELU(keras.src.layers.layer.Layer) + | ELU(alpha=1.0, **kwargs) + | + | Applies an Exponential Linear Unit function to an output. + | + | Formula: + | + | ``` + | f(x) = alpha * (exp(x) - 1.) for x < 0 + | f(x) = x for x >= 0 + | ``` + | + | Args: + | alpha: float, slope of negative section. Defaults to `1.0`. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Method resolution order: + | ELU + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | alpha=1.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + diff --git a/.tether/man/layer_activation_leaky_relu.txt b/.tether/man/layer_activation_leaky_relu.txt new file mode 100644 index 0000000000..ac89331dae --- /dev/null +++ b/.tether/man/layer_activation_leaky_relu.txt @@ -0,0 +1,61 @@ +Help on class LeakyReLU in module keras.src.layers.activations.leaky_relu: + +class LeakyReLU(keras.src.layers.layer.Layer) + | LeakyReLU(negative_slope=0.3, **kwargs) + | + | Leaky version of a Rectified Linear Unit activation layer. + | + | This layer allows a small gradient when the unit is not active. + | + | Formula: + | + | ``` python + | f(x) = alpha * x if x < 0 + | f(x) = x if x >= 0 + | ``` + | + | Example: + | + | ``` python + | leaky_relu_layer = LeakyReLU(negative_slope=0.5) + | input = np.array([-10, -5, 0.0, 5, 10]) + | result = leaky_relu_layer(input) + | # result = [-5. , -2.5, 0. , 5. , 10.] + | ``` + | + | Args: + | negative_slope: Float >= 0.0. Negative slope coefficient. + | Defaults to `0.3`. + | **kwargs: Base layer keyword arguments, such as + | `name` and `dtype`. + | + | Method resolution order: + | LeakyReLU + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | negative_slope=0.3, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_activation_parametric_relu.txt b/.tether/man/layer_activation_parametric_relu.txt new file mode 100644 index 0000000000..13a001974c --- /dev/null +++ b/.tether/man/layer_activation_parametric_relu.txt @@ -0,0 +1,61 @@ +Help on class PReLU in module keras.src.layers.activations.prelu: + +class PReLU(keras.src.layers.layer.Layer) + | PReLU(alpha_initializer='Zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None, **kwargs) + | + | Parametric Rectified Linear Unit activation layer. + | + | Formula: + | ``` python + | f(x) = alpha * x for x < 0 + | f(x) = x for x >= 0 + | ``` + | where `alpha` is a learned array with the same shape as x. + | + | Args: + | alpha_initializer: Initializer function for the weights. + | alpha_regularizer: Regularizer for the weights. + | alpha_constraint: Constraint for the weights. + | shared_axes: The axes along which to share learnable parameters for the + | activation function. For example, if the incoming feature maps are + | from a 2D convolution with output shape + | `(batch, height, width, channels)`, and you wish to share parameters + | across space so that each filter only has one set of parameters, + | set `shared_axes=[1, 2]`. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Method resolution order: + | PReLU + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | alpha_initializer='Zeros', + | alpha_regularizer=None, + | alpha_constraint=None, + | shared_axes=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_activation_relu.txt b/.tether/man/layer_activation_relu.txt new file mode 100644 index 0000000000..515364858a --- /dev/null +++ b/.tether/man/layer_activation_relu.txt @@ -0,0 +1,68 @@ +Help on class ReLU in module keras.src.layers.activations.relu: + +class ReLU(keras.src.layers.layer.Layer) + | ReLU(max_value=None, negative_slope=0.0, threshold=0.0, **kwargs) + | + | Rectified Linear Unit activation function layer. + | + | Formula: + | ``` python + | f(x) = max(x,0) + | f(x) = max_value if x >= max_value + | f(x) = x if threshold <= x < max_value + | f(x) = negative_slope * (x - threshold) otherwise + | ``` + | + | Example: + | ``` python + | relu_layer = keras.layers.activations.ReLU( + | max_value=10, + | negative_slope=0.5, + | threshold=0, + | ) + | input = np.array([-10, -5, 0.0, 5, 10]) + | result = relu_layer(input) + | # result = [-5. , -2.5, 0. , 5. , 10.] + | ``` + | + | Args: + | max_value: Float >= 0. Maximum activation value. None means unlimited. + | Defaults to `None`. + | negative_slope: Float >= 0. Negative slope coefficient. + | Defaults to `0.0`. + | threshold: Float >= 0. Threshold value for thresholded activation. + | Defaults to `0.0`. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Method resolution order: + | ReLU + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | max_value=None, + | negative_slope=0.0, + | threshold=0.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_activation_softmax.txt b/.tether/man/layer_activation_softmax.txt new file mode 100644 index 0000000000..f42daba539 --- /dev/null +++ b/.tether/man/layer_activation_softmax.txt @@ -0,0 +1,67 @@ +Help on class Softmax in module keras.src.layers.activations.softmax: + +class Softmax(keras.src.layers.layer.Layer) + | Softmax(axis=-1, **kwargs) + | + | Softmax activation layer. + | + | Formula: + | ``` python + | exp_x = exp(x - max(x)) + | f(x) = exp_x / sum(exp_x) + | ``` + | + | Example: + | >>>softmax_layer = keras.layers.activations.Softmax() + | >>>input = np.array([1.0, 2.0, 1.0]) + | >>>result = softmax_layer(input) + | [0.21194157, 0.5761169, 0.21194157] + | + | + | Args: + | axis: Integer, or list of Integers, axis along which the softmax + | normalization is applied. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Call arguments: + | inputs: The inputs (logits) to the softmax layer. + | mask: A boolean mask of the same shape as `inputs`. The mask + | specifies 1 to keep and 0 to mask. Defaults to `None`. + | + | Returns: + | Softmaxed output with the same shape as `inputs`. + | + | Method resolution order: + | Softmax + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axis=-1, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_activity_regularization.txt b/.tether/man/layer_activity_regularization.txt new file mode 100644 index 0000000000..9caaf26854 --- /dev/null +++ b/.tether/man/layer_activity_regularization.txt @@ -0,0 +1,50 @@ +Help on class ActivityRegularization in module keras.src.layers.regularization.activity_regularization: + +class ActivityRegularization(keras.src.layers.layer.Layer) + | ActivityRegularization(l1=0.0, l2=0.0, **kwargs) + | + | Layer that applies an update to the cost function based input activity. + | + | Args: + | l1: L1 regularization factor (positive float). + | l2: L2 regularization factor (positive float). + | + | Input shape: + | Arbitrary. Use the keyword argument `input_shape` + | (tuple of integers, does not include the samples axis) + | when using this layer as the first layer in a model. + | + | Output shape: + | Same shape as input. + | + | Method resolution order: + | ActivityRegularization + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | l1=0.0, + | l2=0.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_add.txt b/.tether/man/layer_add.txt new file mode 100644 index 0000000000..d95dcbe6b8 --- /dev/null +++ b/.tether/man/layer_add.txt @@ -0,0 +1,65 @@ +Help on class Add in module keras.src.layers.merging.add: + +class Add(keras.src.layers.merging.base_merge.Merge) + | Add(**kwargs) + | + | Performs elementwise addition operation. + | + | It takes as input a list of tensors, all of the same shape, + | and returns a single tensor (also of the same shape). + | + | Examples: + | + | >>> input_shape = (2, 3, 4) + | >>> x1 = np.random.rand(*input_shape) + | >>> x2 = np.random.rand(*input_shape) + | >>> y = keras.layers.Add()([x1, x2]) + | + | Usage in a Keras model: + | + | >>> input1 = keras.layers.Input(shape=(16,)) + | >>> x1 = keras.layers.Dense(8, activation='relu')(input1) + | >>> input2 = keras.layers.Input(shape=(32,)) + | >>> x2 = keras.layers.Dense(8, activation='relu')(input2) + | >>> # equivalent to `added = keras.layers.add([x1, x2])` + | >>> added = keras.layers.Add()([x1, x2]) + | >>> out = keras.layers.Dense(4)(added) + | >>> model = keras.models.Model(inputs=[input1, input2], outputs=out) + | + | Method resolution order: + | Add + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods inherited from keras.src.layers.merging.base_merge.Merge: + | + | __init__(self, **kwargs) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_additive_attention.txt b/.tether/man/layer_additive_attention.txt new file mode 100644 index 0000000000..4f47e8979f --- /dev/null +++ b/.tether/man/layer_additive_attention.txt @@ -0,0 +1,86 @@ +Help on class AdditiveAttention in module keras.src.layers.attention.additive_attention: + +class AdditiveAttention(keras.src.layers.attention.attention.Attention) + | AdditiveAttention(use_scale=True, dropout=0.0, **kwargs) + | + | Additive attention layer, a.k.a. Bahdanau-style attention. + | + | Inputs are a list with 2 or 3 elements: + | 1. A `query` tensor of shape `(batch_size, Tq, dim)`. + | 2. A `value` tensor of shape `(batch_size, Tv, dim)`. + | 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none + | supplied, `value` will be used as `key`. + | + | The calculation follows the steps: + | 1. Calculate attention scores using `query` and `key` with shape + | `(batch_size, Tq, Tv)` as a non-linear sum + | `scores = reduce_sum(tanh(query + key), axis=-1)`. + | 2. Use scores to calculate a softmax distribution with shape + | `(batch_size, Tq, Tv)`. + | 3. Use the softmax distribution to create a linear combination of `value` + | with shape `(batch_size, Tq, dim)`. + | + | Args: + | use_scale: If `True`, will create a scalar variable to scale the + | attention scores. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | attention scores. Defaults to `0.0`. + | + | Call Args: + | inputs: List of the following tensors: + | - `query`: Query tensor of shape `(batch_size, Tq, dim)`. + | - `value`: Value tensor of shape `(batch_size, Tv, dim)`. + | - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If + | not given, will use `value` for both `key` and `value`, which is + | the most common case. + | mask: List of the following tensors: + | - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`. + | If given, the output will be zero at the positions where + | `mask==False`. + | - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`. + | If given, will apply the mask such that values at positions + | where `mask==False` do not contribute to the result. + | return_attention_scores: bool, it `True`, returns the attention scores + | (after masking and softmax) as an additional output argument. + | training: Python boolean indicating whether the layer should behave in + | training mode (adding dropout) or in inference mode (no dropout). + | use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds + | a mask such that position `i` cannot attend to positions `j > i`. + | This prevents the flow of information from the future towards the + | past. Defaults to `False`. + | + | Output: + | Attention outputs of shape `(batch_size, Tq, dim)`. + | (Optional) Attention scores after masking and softmax with shape + | `(batch_size, Tq, Tv)`. + | + | Method resolution order: + | AdditiveAttention + | keras.src.layers.attention.attention.Attention + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | use_scale=True, + | dropout=0.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_alpha_dropout.txt b/.tether/man/layer_alpha_dropout.txt new file mode 100644 index 0000000000..066406e967 --- /dev/null +++ b/.tether/man/layer_alpha_dropout.txt @@ -0,0 +1,66 @@ +Help on class AlphaDropout in module keras.src.layers.regularization.alpha_dropout: + +class AlphaDropout(keras.src.layers.layer.Layer) + | AlphaDropout(rate, noise_shape=None, seed=None, **kwargs) + | + | Applies Alpha Dropout to the input. + | + | Alpha Dropout is a `Dropout` that keeps mean and variance of inputs + | to their original values, in order to ensure the self-normalizing property + | even after this dropout. + | Alpha Dropout fits well to Scaled Exponential Linear Units (SELU) by + | randomly setting activations to the negative saturation value. + | + | Args: + | rate: Float between 0 and 1. The multiplicative noise will have + | standard deviation `sqrt(rate / (1 - rate))`. + | noise_shape: 1D integer tensor representing the shape of the + | binary alpha dropout mask that will be multiplied with the input. + | For instance, if your inputs have shape + | `(batch_size, timesteps, features)` and + | you want the alpha dropout mask to be the same for all timesteps, + | you can use `noise_shape=(batch_size, 1, features)`. + | seed: A Python integer to use as random seed. + | + | Call arguments: + | inputs: Input tensor (of any rank). + | training: Python boolean indicating whether the layer should behave in + | training mode (adding alpha dropout) or in inference mode + | (doing nothing). + | + | Method resolution order: + | AlphaDropout + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | rate, + | noise_shape=None, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=False + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_attention.txt b/.tether/man/layer_attention.txt new file mode 100644 index 0000000000..5939c48a9e --- /dev/null +++ b/.tether/man/layer_attention.txt @@ -0,0 +1,109 @@ +Help on class Attention in module keras.src.layers.attention.attention: + +class Attention(keras.src.layers.layer.Layer) + | Attention(use_scale=False, score_mode='dot', dropout=0.0, seed=None, **kwargs) + | + | Dot-product attention layer, a.k.a. Luong-style attention. + | + | Inputs are a list with 2 or 3 elements: + | 1. A `query` tensor of shape `(batch_size, Tq, dim)`. + | 2. A `value` tensor of shape `(batch_size, Tv, dim)`. + | 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none + | supplied, `value` will be used as a `key`. + | + | The calculation follows the steps: + | 1. Calculate attention scores using `query` and `key` with shape + | `(batch_size, Tq, Tv)`. + | 2. Use scores to calculate a softmax distribution with shape + | `(batch_size, Tq, Tv)`. + | 3. Use the softmax distribution to create a linear combination of `value` + | with shape `(batch_size, Tq, dim)`. + | + | Args: + | use_scale: If `True`, will create a scalar variable to scale the + | attention scores. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | attention scores. Defaults to `0.0`. + | seed: A Python integer to use as random seed incase of `dropout`. + | score_mode: Function to use to compute attention scores, one of + | `{"dot", "concat"}`. `"dot"` refers to the dot product between the + | query and key vectors. `"concat"` refers to the hyperbolic tangent + | of the concatenation of the `query` and `key` vectors. + | + | Call Args: + | inputs: List of the following tensors: + | - `query`: Query tensor of shape `(batch_size, Tq, dim)`. + | - `value`: Value tensor of shape `(batch_size, Tv, dim)`. + | - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If + | not given, will use `value` for both `key` and `value`, which is + | the most common case. + | mask: List of the following tensors: + | - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`. + | If given, the output will be zero at the positions where + | `mask==False`. + | - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`. + | If given, will apply the mask such that values at positions + | where `mask==False` do not contribute to the result. + | return_attention_scores: bool, it `True`, returns the attention scores + | (after masking and softmax) as an additional output argument. + | training: Python boolean indicating whether the layer should behave in + | training mode (adding dropout) or in inference mode (no dropout). + | use_causal_mask: Boolean. Set to `True` for decoder self-attention. Adds + | a mask such that position `i` cannot attend to positions `j > i`. + | This prevents the flow of information from the future towards the + | past. Defaults to `False`. + | + | Output: + | Attention outputs of shape `(batch_size, Tq, dim)`. + | (Optional) Attention scores after masking and softmax with shape + | `(batch_size, Tq, Tv)`. + | + | Method resolution order: + | Attention + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | use_scale=False, + | score_mode='dot', + | dropout=0.0, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | mask=None, + | training=False, + | return_attention_scores=False, + | use_causal_mask=False + | ) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | Returns shape of value tensor dim, but for query tensor length + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_average.txt b/.tether/man/layer_average.txt new file mode 100644 index 0000000000..261e115483 --- /dev/null +++ b/.tether/man/layer_average.txt @@ -0,0 +1,65 @@ +Help on class Average in module keras.src.layers.merging.average: + +class Average(keras.src.layers.merging.base_merge.Merge) + | Average(**kwargs) + | + | Averages a list of inputs element-wise.. + | + | It takes as input a list of tensors, all of the same shape, + | and returns a single tensor (also of the same shape). + | + | Examples: + | + | >>> input_shape = (2, 3, 4) + | >>> x1 = np.random.rand(*input_shape) + | >>> x2 = np.random.rand(*input_shape) + | >>> y = keras.layers.Average()([x1, x2]) + | + | Usage in a Keras model: + | + | >>> input1 = keras.layers.Input(shape=(16,)) + | >>> x1 = keras.layers.Dense(8, activation='relu')(input1) + | >>> input2 = keras.layers.Input(shape=(32,)) + | >>> x2 = keras.layers.Dense(8, activation='relu')(input2) + | >>> # equivalent to `y = keras.layers.average([x1, x2])` + | >>> y = keras.layers.Average()([x1, x2]) + | >>> out = keras.layers.Dense(4)(y) + | >>> model = keras.models.Model(inputs=[input1, input2], outputs=out) + | + | Method resolution order: + | Average + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods inherited from keras.src.layers.merging.base_merge.Merge: + | + | __init__(self, **kwargs) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_average_pooling_1d.txt b/.tether/man/layer_average_pooling_1d.txt new file mode 100644 index 0000000000..8674429ba9 --- /dev/null +++ b/.tether/man/layer_average_pooling_1d.txt @@ -0,0 +1,94 @@ +Help on class AveragePooling1D in module keras.src.layers.pooling.average_pooling1d: + +class AveragePooling1D(keras.src.layers.pooling.base_pooling.BasePooling) + | AveragePooling1D(pool_size, strides=None, padding='valid', data_format=None, name=None, **kwargs) + | + | Average pooling for temporal data. + | + | Downsamples the input representation by taking the average value over the + | window defined by `pool_size`. The window is shifted by `strides`. The + | resulting output when using "valid" padding option has a shape of: + | `output_shape = (input_shape - pool_size + 1) / strides)` + | + | The resulting output shape when using the "same" padding option is: + | `output_shape = input_shape / strides` + | + | Args: + | pool_size: int, size of the max pooling window. + | strides: int or None. Specifies how much the pooling window moves + | for each pooling step. If None, it will default to `pool_size`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 3D tensor with shape `(batch_size, steps, features)`. + | - If `data_format="channels_first"`: + | 3D tensor with shape `(batch_size, features, steps)`. + | + | Output shape: + | - If `data_format="channels_last"`: + | 3D tensor with shape `(batch_size, downsampled_steps, features)`. + | - If `data_format="channels_first"`: + | 3D tensor with shape `(batch_size, features, downsampled_steps)`. + | + | Examples: + | + | `strides=1` and `padding="valid"`: + | + | >>> x = np.array([1., 2., 3., 4., 5.]) + | >>> x = np.reshape(x, [1, 5, 1]) + | >>> avg_pool_1d = keras.layers.AveragePooling1D(pool_size=2, + | ... strides=1, padding="valid") + | >>> avg_pool_1d(x) + | + | `strides=2` and `padding="valid"`: + | + | >>> x = np.array([1., 2., 3., 4., 5.]) + | >>> x = np.reshape(x, [1, 5, 1]) + | >>> avg_pool_1d = keras.layers.AveragePooling1D(pool_size=2, + | ... strides=2, padding="valid") + | >>> avg_pool_1d(x) + | + | `strides=1` and `padding="same"`: + | + | >>> x = np.array([1., 2., 3., 4., 5.]) + | >>> x = np.reshape(x, [1, 5, 1]) + | >>> avg_pool_1d = keras.layers.AveragePooling1D(pool_size=2, + | ... strides=1, padding="same") + | >>> avg_pool_1d(x) + | + | Method resolution order: + | AveragePooling1D + | keras.src.layers.pooling.base_pooling.BasePooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | pool_size, + | strides=None, + | padding='valid', + | data_format=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_average_pooling_2d.txt b/.tether/man/layer_average_pooling_2d.txt new file mode 100644 index 0000000000..6e8385a66a --- /dev/null +++ b/.tether/man/layer_average_pooling_2d.txt @@ -0,0 +1,111 @@ +Help on class AveragePooling2D in module keras.src.layers.pooling.average_pooling2d: + +class AveragePooling2D(keras.src.layers.pooling.base_pooling.BasePooling) + | AveragePooling2D(pool_size, strides=None, padding='valid', data_format=None, name=None, **kwargs) + | + | Average pooling operation for 2D spatial data. + | + | Downsamples the input along its spatial dimensions (height and width) + | by taking the average value over an input window + | (of size defined by `pool_size`) for each channel of the input. + | The window is shifted by `strides` along each dimension. + | + | The resulting output when using the `"valid"` padding option has a spatial + | shape (number of rows or columns) of: + | `output_shape = math.floor((input_shape - pool_size) / strides) + 1` + | (when `input_shape >= pool_size`) + | + | The resulting output shape when using the `"same"` padding option is: + | `output_shape = math.floor((input_shape - 1) / strides) + 1` + | + | Args: + | pool_size: int or tuple of 2 integers, factors by which to downscale + | (dim1, dim2). If only one integer is specified, the same + | window length will be used for all dimensions. + | strides: int or tuple of 2 integers, or None. Strides values. If None, + | it will default to `pool_size`. If only one int is specified, the + | same stride size will be used for all dimensions. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 4D tensor with shape `(batch_size, height, width, channels)`. + | - If `data_format="channels_first"`: + | 4D tensor with shape `(batch_size, channels, height, width)`. + | + | Output shape: + | - If `data_format="channels_last"`: + | 4D tensor with shape + | `(batch_size, pooled_height, pooled_width, channels)`. + | - If `data_format="channels_first"`: + | 4D tensor with shape + | `(batch_size, channels, pooled_height, pooled_width)`. + | + | Examples: + | + | `strides=(1, 1)` and `padding="valid"`: + | + | >>> x = np.array([[1., 2., 3.], + | ... [4., 5., 6.], + | ... [7., 8., 9.]]) + | >>> x = np.reshape(x, [1, 3, 3, 1]) + | >>> avg_pool_2d = keras.layers.AveragePooling2D(pool_size=(2, 2), + | ... strides=(1, 1), padding="valid") + | >>> avg_pool_2d(x) + | + | `strides=(2, 2)` and `padding="valid"`: + | + | >>> x = np.array([[1., 2., 3., 4.], + | ... [5., 6., 7., 8.], + | ... [9., 10., 11., 12.]]) + | >>> x = np.reshape(x, [1, 3, 4, 1]) + | >>> avg_pool_2d = keras.layers.AveragePooling2D(pool_size=(2, 2), + | ... strides=(2, 2), padding="valid") + | >>> avg_pool_2d(x) + | + | `stride=(1, 1)` and `padding="same"`: + | + | >>> x = np.array([[1., 2., 3.], + | ... [4., 5., 6.], + | ... [7., 8., 9.]]) + | >>> x = np.reshape(x, [1, 3, 3, 1]) + | >>> avg_pool_2d = keras.layers.AveragePooling2D(pool_size=(2, 2), + | ... strides=(1, 1), padding="same") + | >>> avg_pool_2d(x) + | + | Method resolution order: + | AveragePooling2D + | keras.src.layers.pooling.base_pooling.BasePooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | pool_size, + | strides=None, + | padding='valid', + | data_format=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_average_pooling_3d.txt b/.tether/man/layer_average_pooling_3d.txt new file mode 100644 index 0000000000..5d6f227708 --- /dev/null +++ b/.tether/man/layer_average_pooling_3d.txt @@ -0,0 +1,87 @@ +Help on class AveragePooling3D in module keras.src.layers.pooling.average_pooling3d: + +class AveragePooling3D(keras.src.layers.pooling.base_pooling.BasePooling) + | AveragePooling3D(pool_size, strides=None, padding='valid', data_format=None, name=None, **kwargs) + | + | Average pooling operation for 3D data (spatial or spatio-temporal). + | + | Downsamples the input along its spatial dimensions (depth, height, and + | width) by taking the average value over an input window (of size defined by + | `pool_size`) for each channel of the input. The window is shifted by + | `strides` along each dimension. + | + | Args: + | pool_size: int or tuple of 3 integers, factors by which to downscale + | (dim1, dim2, dim3). If only one integer is specified, the same + | window length will be used for all dimensions. + | strides: int or tuple of 3 integers, or None. Strides values. If None, + | it will default to `pool_size`. If only one int is specified, the + | same stride size will be used for all dimensions. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while + | `"channels_first"` corresponds to inputs with shape + | `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | It defaults to the `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json`. If you never set it, then it + | will be `"channels_last"`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + | + | Output shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` + | + | Example: + | + | ```python + | depth = 30 + | height = 30 + | width = 30 + | channels = 3 + | + | inputs = keras.layers.Input(shape=(depth, height, width, channels)) + | layer = keras.layers.AveragePooling3D(pool_size=3) + | outputs = layer(inputs) # Shape: (batch_size, 10, 10, 10, 3) + | ``` + | + | Method resolution order: + | AveragePooling3D + | keras.src.layers.pooling.base_pooling.BasePooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | pool_size, + | strides=None, + | padding='valid', + | data_format=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_batch_normalization.txt b/.tether/man/layer_batch_normalization.txt new file mode 100644 index 0000000000..8ef748a2cf --- /dev/null +++ b/.tether/man/layer_batch_normalization.txt @@ -0,0 +1,165 @@ +Help on class BatchNormalization in module keras.src.layers.normalization.batch_normalization: + +class BatchNormalization(keras.src.layers.layer.Layer) + | BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, synchronized=False, **kwargs) + | + | Layer that normalizes its inputs. + | + | Batch normalization applies a transformation that maintains the mean output + | close to 0 and the output standard deviation close to 1. + | + | Importantly, batch normalization works differently during training and + | during inference. + | + | **During training** (i.e. when using `fit()` or when calling the layer/model + | with the argument `training=True`), the layer normalizes its output using + | the mean and standard deviation of the current batch of inputs. That is to + | say, for each channel being normalized, the layer returns + | `gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where: + | + | - `epsilon` is small constant (configurable as part of the constructor + | arguments) + | - `gamma` is a learned scaling factor (initialized as 1), which + | can be disabled by passing `scale=False` to the constructor. + | - `beta` is a learned offset factor (initialized as 0), which + | can be disabled by passing `center=False` to the constructor. + | + | **During inference** (i.e. when using `evaluate()` or `predict()` or when + | calling the layer/model with the argument `training=False` (which is the + | default), the layer normalizes its output using a moving average of the + | mean and standard deviation of the batches it has seen during training. That + | is to say, it returns + | `gamma * (batch - self.moving_mean) / sqrt(self.moving_var+epsilon) + beta`. + | + | `self.moving_mean` and `self.moving_var` are non-trainable variables that + | are updated each time the layer in called in training mode, as such: + | + | - `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)` + | - `moving_var = moving_var * momentum + var(batch) * (1 - momentum)` + | + | As such, the layer will only normalize its inputs during inference + | *after having been trained on data that has similar statistics as the + | inference data*. + | + | Args: + | axis: Integer, the axis that should be normalized + | (typically the features axis). For instance, after a `Conv2D` layer + | with `data_format="channels_first"`, use `axis=1`. + | momentum: Momentum for the moving average. + | epsilon: Small float added to variance to avoid dividing by zero. + | center: If `True`, add offset of `beta` to normalized tensor. + | If `False`, `beta` is ignored. + | scale: If `True`, multiply by `gamma`. If `False`, `gamma` is not used. + | When the next layer is linear this can be disabled + | since the scaling will be done by the next layer. + | beta_initializer: Initializer for the beta weight. + | gamma_initializer: Initializer for the gamma weight. + | moving_mean_initializer: Initializer for the moving mean. + | moving_variance_initializer: Initializer for the moving variance. + | beta_regularizer: Optional regularizer for the beta weight. + | gamma_regularizer: Optional regularizer for the gamma weight. + | beta_constraint: Optional constraint for the beta weight. + | gamma_constraint: Optional constraint for the gamma weight. + | synchronized: Only applicable with the TensorFlow backend. + | If `True`, synchronizes the global batch statistics (mean and + | variance) for the layer across all devices at each training step + | in a distributed training strategy. + | If `False`, each replica uses its own local batch statistics. + | **kwargs: Base layer keyword arguments (e.g. `name` and `dtype`). + | + | Call arguments: + | inputs: Input tensor (of any rank). + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. + | - `training=True`: The layer will normalize its inputs using + | the mean and variance of the current batch of inputs. + | - `training=False`: The layer will normalize its inputs using + | the mean and variance of its moving statistics, learned during + | training. + | mask: Binary tensor of shape broadcastable to `inputs` tensor, with + | `True` values indicating the positions for which mean and variance + | should be computed. Masked elements of the current inputs are not + | taken into account for mean and variance computation during + | training. Any prior unmasked element values will be taken into + | account until their momentum expires. + | + | Reference: + | + | - [Ioffe and Szegedy, 2015](https://arxiv.org/abs/1502.03167). + | + | **About setting `layer.trainable = False` on a `BatchNormalization` layer:** + | + | The meaning of setting `layer.trainable = False` is to freeze the layer, + | i.e. its internal state will not change during training: + | its trainable weights will not be updated + | during `fit()` or `train_on_batch()`, and its state updates will not be run. + | + | Usually, this does not necessarily mean that the layer is run in inference + | mode (which is normally controlled by the `training` argument that can + | be passed when calling a layer). "Frozen state" and "inference mode" + | are two separate concepts. + | + | However, in the case of the `BatchNormalization` layer, **setting + | `trainable = False` on the layer means that the layer will be + | subsequently run in inference mode** (meaning that it will use + | the moving mean and the moving variance to normalize the current batch, + | rather than using the mean and variance of the current batch). + | + | Note that: + | + | - Setting `trainable` on an model containing other layers will recursively + | set the `trainable` value of all inner layers. + | - If the value of the `trainable` attribute is changed after calling + | `compile()` on a model, the new value doesn't take effect for this model + | until `compile()` is called again. + | + | Method resolution order: + | BatchNormalization + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axis=-1, + | momentum=0.99, + | epsilon=0.001, + | center=True, + | scale=True, + | beta_initializer='zeros', + | gamma_initializer='ones', + | moving_mean_initializer='zeros', + | moving_variance_initializer='ones', + | beta_regularizer=None, + | gamma_regularizer=None, + | beta_constraint=None, + | gamma_constraint=None, + | synchronized=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | training=None, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_bidirectional.txt b/.tether/man/layer_bidirectional.txt new file mode 100644 index 0000000000..fb798433e7 --- /dev/null +++ b/.tether/man/layer_bidirectional.txt @@ -0,0 +1,158 @@ +Help on class Bidirectional in module keras.src.layers.rnn.bidirectional: + +class Bidirectional(keras.src.layers.core.wrapper.Wrapper) + | Bidirectional(layer, merge_mode='concat', weights=None, backward_layer=None, **kwargs) + | + | Bidirectional wrapper for RNNs. + | + | Args: + | layer: `keras.layers.RNN` instance, such as + | `keras.layers.LSTM` or `keras.layers.GRU`. + | It could also be a `keras.layers.Layer` instance + | that meets the following criteria: + | 1. Be a sequence-processing layer (accepts 3D+ inputs). + | 2. Have a `go_backwards`, `return_sequences` and `return_state` + | attribute (with the same semantics as for the `RNN` class). + | 3. Have an `input_spec` attribute. + | 4. Implement serialization via `get_config()` and `from_config()`. + | Note that the recommended way to create new RNN layers is to write a + | custom RNN cell and use it with `keras.layers.RNN`, instead of + | subclassing `keras.layers.Layer` directly. + | When `return_sequences` is `True`, the output of the masked + | timestep will be zero regardless of the layer's original + | `zero_output_for_mask` value. + | merge_mode: Mode by which outputs of the forward and backward RNNs + | will be combined. One of `{"sum", "mul", "concat", "ave", None}`. + | If `None`, the outputs will not be combined, + | they will be returned as a list. Defaults to `"concat"`. + | backward_layer: Optional `keras.layers.RNN`, + | or `keras.layers.Layer` instance to be used to handle + | backwards input processing. + | If `backward_layer` is not provided, the layer instance passed + | as the `layer` argument will be used to generate the backward layer + | automatically. + | Note that the provided `backward_layer` layer should have properties + | matching those of the `layer` argument, in particular + | it should have the same values for `stateful`, `return_states`, + | `return_sequences`, etc. In addition, `backward_layer` + | and `layer` should have different `go_backwards` argument values. + | A `ValueError` will be raised if these requirements are not met. + | + | Call arguments: + | The call arguments for this layer are the same as those of the + | wrapped RNN layer. Beware that when passing the `initial_state` + | argument during the call of this layer, the first half in the + | list of elements in the `initial_state` list will be passed to + | the forward RNN call and the last half in the list of elements + | will be passed to the backward RNN call. + | + | Note: instantiating a `Bidirectional` layer from an existing RNN layer + | instance will not reuse the weights state of the RNN layer instance -- the + | `Bidirectional` layer will have freshly initialized weights. + | + | Examples: + | + | ```python + | model = Sequential([ + | Input(shape=(5, 10)), + | Bidirectional(LSTM(10, return_sequences=True), + | Bidirectional(LSTM(10)), + | Dense(5, activation="softmax"), + | ]) + | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') + | + | # With custom backward layer + | forward_layer = LSTM(10, return_sequences=True) + | backward_layer = LSTM(10, activation='relu', return_sequences=True, + | go_backwards=True) + | model = Sequential([ + | Input(shape=(5, 10)), + | Bidirectional(forward_layer, backward_layer=backward_layer), + | Dense(5, activation="softmax"), + | ]) + | model.compile(loss='categorical_crossentropy', optimizer='rmsprop') + | ``` + | + | Method resolution order: + | Bidirectional + | keras.src.layers.core.wrapper.Wrapper + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | layer, + | merge_mode='concat', + | weights=None, + | backward_layer=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build( + | self, + | sequences_shape, + | initial_state_shape=None + | ) + | + | call( + | self, + | sequences, + | initial_state=None, + | mask=None, + | training=None + | ) + | + | compute_mask( + | self, + | _, + | mask + | ) + | + | compute_output_shape( + | self, + | sequences_shape, + | initial_state_shape=None + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | reset_state(self) + | + | reset_states(self) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | states + | + diff --git a/.tether/man/layer_category_encoding.txt b/.tether/man/layer_category_encoding.txt new file mode 100644 index 0000000000..bf28647c8c --- /dev/null +++ b/.tether/man/layer_category_encoding.txt @@ -0,0 +1,125 @@ +Help on class CategoryEncoding in module keras.src.layers.preprocessing.category_encoding: + +class CategoryEncoding(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | CategoryEncoding(num_tokens=None, output_mode='multi_hot', sparse=False, **kwargs) + | + | A preprocessing layer which encodes integer features. + | + | This layer provides options for condensing data into a categorical encoding + | when the total number of tokens are known in advance. It accepts integer + | values as inputs, and it outputs a dense or sparse representation of those + | inputs. For integer inputs where the total number of tokens is not known, + | use `keras.layers.IntegerLookup` instead. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Examples: + | + | **One-hot encoding data** + | + | >>> layer = keras.layers.CategoryEncoding( + | ... num_tokens=4, output_mode="one_hot") + | >>> layer([3, 2, 0, 1]) + | array([[0., 0., 0., 1.], + | [0., 0., 1., 0.], + | [1., 0., 0., 0.], + | [0., 1., 0., 0.]]> + | + | **Multi-hot encoding data** + | + | >>> layer = keras.layers.CategoryEncoding( + | ... num_tokens=4, output_mode="multi_hot") + | >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]]) + | array([[1., 1., 0., 0.], + | [1., 0., 0., 0.], + | [0., 1., 1., 0.], + | [0., 1., 0., 1.]]> + | + | **Using weighted inputs in `"count"` mode** + | + | >>> layer = keras.layers.CategoryEncoding( + | ... num_tokens=4, output_mode="count") + | >>> count_weights = np.array([[.1, .2], [.1, .1], [.2, .3], [.4, .2]]) + | >>> layer([[0, 1], [0, 0], [1, 2], [3, 1]], count_weights=count_weights) + | array([[0.1, 0.2, 0. , 0. ], + | [0.2, 0. , 0. , 0. ], + | [0. , 0.2, 0.3, 0. ], + | [0. , 0.2, 0. , 0.4]]> + | + | Args: + | num_tokens: The total number of tokens the layer should support. All + | inputs to the layer must integers in the range `0 <= value < + | num_tokens`, or an error will be thrown. + | output_mode: Specification for the output of the layer. + | Values can be `"one_hot"`, `"multi_hot"` or `"count"`, + | configuring the layer as follows: + | - `"one_hot"`: Encodes each individual element in the input + | into an array of `num_tokens` size, containing a 1 at the + | element index. If the last dimension is size 1, will encode + | on that dimension. If the last dimension is not size 1, + | will append a new dimension for the encoded output. + | - `"multi_hot"`: Encodes each sample in the input into a single + | array of `num_tokens` size, containing a 1 for each + | vocabulary term present in the sample. Treats the last + | dimension as the sample dimension, if input shape is + | `(..., sample_length)`, output shape will be + | `(..., num_tokens)`. + | - `"count"`: Like `"multi_hot"`, but the int array contains a + | count of the number of times the token at that index + | appeared in the sample. + | For all output modes, currently only output up to rank 2 is + | supported. + | Defaults to `"multi_hot"`. + | sparse: Whether to return a sparse tensor; for backends that support + | sparse tensors. + | + | Call arguments: + | inputs: A 1D or 2D tensor of integer inputs. + | count_weights: A tensor in the same shape as `inputs` indicating the + | weight for each sample value when summing up in `count` mode. + | Not used in `"multi_hot"` or `"one_hot"` modes. + | + | Method resolution order: + | CategoryEncoding + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_tokens=None, + | output_mode='multi_hot', + | sparse=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | count_weights=None + | ) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec( + | self, + | inputs, + | count_weights=None + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_center_crop.txt b/.tether/man/layer_center_crop.txt new file mode 100644 index 0000000000..c923ad92f6 --- /dev/null +++ b/.tether/man/layer_center_crop.txt @@ -0,0 +1,76 @@ +Help on class CenterCrop in module keras.src.layers.preprocessing.center_crop: + +class CenterCrop(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | CenterCrop(height, width, data_format=None, **kwargs) + | + | A preprocessing layer which crops images. + | + | This layers crops the central portion of the images to a target size. If an + | image is smaller than the target size, it will be resized and cropped + | so as to return the largest possible window in the image that matches + | the target aspect ratio. + | + | Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`). + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format, + | or `(..., channels, height, width)`, in `"channels_first"` format. + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., target_height, target_width, channels)`, + | or `(..., channels, target_height, target_width)`, + | in `"channels_first"` format. + | + | If the input height/width is even and the target height/width is odd (or + | inversely), the input image is left-padded by 1 pixel. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | height: Integer, the height of the output shape. + | width: Integer, the width of the output shape. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | + | Method resolution order: + | CenterCrop + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | height, + | width, + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_concatenate.txt b/.tether/man/layer_concatenate.txt new file mode 100644 index 0000000000..9c982a0273 --- /dev/null +++ b/.tether/man/layer_concatenate.txt @@ -0,0 +1,67 @@ +Help on class Concatenate in module keras.src.layers.merging.concatenate: + +class Concatenate(keras.src.layers.merging.base_merge.Merge) + | Concatenate(axis=-1, **kwargs) + | + | Concatenates a list of inputs. + | + | It takes as input a list of tensors, all of the same shape except + | for the concatenation axis, and returns a single tensor that is the + | concatenation of all inputs. + | + | Examples: + | + | >>> x = np.arange(20).reshape(2, 2, 5) + | >>> y = np.arange(20, 30).reshape(2, 1, 5) + | >>> keras.layers.Concatenate(axis=1)([x, y]) + | + | Usage in a Keras model: + | + | >>> x1 = keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) + | >>> x2 = keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) + | >>> y = keras.layers.Concatenate()([x1, x2]) + | + | Args: + | axis: Axis along which to concatenate. + | **kwargs: Standard layer keyword arguments. + | + | Returns: + | A tensor, the concatenation of the inputs alongside axis `axis`. + | + | Method resolution order: + | Concatenate + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axis=-1, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_conv_1d.txt b/.tether/man/layer_conv_1d.txt new file mode 100644 index 0000000000..08adce3b61 --- /dev/null +++ b/.tether/man/layer_conv_1d.txt @@ -0,0 +1,127 @@ +Help on class Conv1D in module keras.src.layers.convolutional.conv1d: + +class Conv1D(keras.src.layers.convolutional.base_conv.BaseConv) + | Conv1D(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, groups=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | 1D convolution layer (e.g. temporal convolution). + | + | This layer creates a convolution kernel that is convolved with the layer + | input over a single spatial (or temporal) dimension to produce a tensor of + | outputs. If `use_bias` is True, a bias vector is created and added to the + | outputs. Finally, if `activation` is not `None`, it is applied to the + | outputs as well. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the convolution). + | kernel_size: int or tuple/list of 1 integer, specifying the size of the + | convolution window. + | strides: int or tuple/list of 1 integer, specifying the stride length + | of the convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, `"valid"`, `"same"` or `"causal"`(case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | `"causal"` results in causal(dilated) convolutions, e.g. `output[t]` + | does not depend on`input[t+1:]`. Useful when modeling temporal data + | where the model should not violate the temporal order. + | See [WaveNet: A Generative Model for Raw Audio, section2.1]( + | https://arxiv.org/abs/1609.03499). + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 1 integers, specifying the dilation + | rate to use for dilated convolution. + | groups: A positive int specifying the number of groups in which the + | input is split along the channel axis. Each group is convolved + | separately with `filters // groups` filters. The output is the + | concatenation of all the `groups` results along the channel axis. + | Input channels and `filters` must both be divisible by `groups`. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | kernel_initializer: Initializer for the convolution kernel. If `None`, + | the default initializer (`"glorot_uniform"`) will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | kernel_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | kernel_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: `(batch_shape, steps, channels)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: `(batch_shape, channels, steps)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: `(batch_shape, new_steps, filters)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: `(batch_shape, filters, new_steps)` + | + | Returns: + | A 3D tensor representing `activation(conv1d(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | Example: + | + | >>> # The inputs are 128-length vectors with 10 timesteps, and the + | >>> # batch size is 4. + | >>> x = np.random.rand(4, 10, 128) + | >>> y = keras.layers.Conv1D(32, 3, activation='relu')(x) + | >>> print(y.shape) + | (4, 8, 32) + | + | Method resolution order: + | Conv1D + | keras.src.layers.convolutional.base_conv.BaseConv + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=1, + | padding='valid', + | data_format=None, + | dilation_rate=1, + | groups=1, + | activation=None, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + diff --git a/.tether/man/layer_conv_1d_transpose.txt b/.tether/man/layer_conv_1d_transpose.txt new file mode 100644 index 0000000000..8a2c9b39f0 --- /dev/null +++ b/.tether/man/layer_conv_1d_transpose.txt @@ -0,0 +1,119 @@ +Help on class Conv1DTranspose in module keras.src.layers.convolutional.conv1d_transpose: + +class Conv1DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose) + | Conv1DTranspose(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | 1D transposed convolution layer. + | + | The need for transposed convolutions generally arise from the desire to use + | a transformation going in the opposite direction of a normal convolution, + | i.e., from something that has the shape of the output of some convolution + | to something that has the shape of its input while maintaining a + | connectivity pattern that is compatible with said convolution. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the transpose convolution). + | kernel_size: int or tuple/list of 1 integer, specifying the size of the + | transposed convolution window. + | strides: int or tuple/list of 1 integer, specifying the stride length + | of the transposed convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 1 integers, specifying the dilation + | rate to use for dilated transposed convolution. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | kernel_initializer: Initializer for the convolution kernel. If `None`, + | the default initializer (`"glorot_uniform"`) will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | kernel_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | kernel_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: `(batch_shape, steps, channels)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: `(batch_shape, channels, steps)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: `(batch_shape, new_steps, filters)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: `(batch_shape, filters, new_steps)` + | + | Returns: + | A 3D tensor representing + | `activation(conv1d_transpose(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | References: + | - [A guide to convolution arithmetic for deep learning]( + | https://arxiv.org/abs/1603.07285v1) + | - [Deconvolutional Networks]( + | https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + | + | Example: + | + | >>> x = np.random.rand(4, 10, 128) + | >>> y = keras.layers.Conv1DTranspose(32, 3, 2, activation='relu')(x) + | >>> print(y.shape) + | (4, 21, 32) + | + | Method resolution order: + | Conv1DTranspose + | keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=1, + | padding='valid', + | data_format=None, + | dilation_rate=1, + | activation=None, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_conv_2d.txt b/.tether/man/layer_conv_2d.txt new file mode 100644 index 0000000000..22a0cca9b5 --- /dev/null +++ b/.tether/man/layer_conv_2d.txt @@ -0,0 +1,120 @@ +Help on class Conv2D in module keras.src.layers.convolutional.conv2d: + +class Conv2D(keras.src.layers.convolutional.base_conv.BaseConv) + | Conv2D(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), groups=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | 2D convolution layer. + | + | This layer creates a convolution kernel that is convolved with the layer + | input over a single spatial (or temporal) dimension to produce a tensor of + | outputs. If `use_bias` is True, a bias vector is created and added to the + | outputs. Finally, if `activation` is not `None`, it is applied to the + | outputs as well. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the convolution). + | kernel_size: int or tuple/list of 2 integer, specifying the size of the + | convolution window. + | strides: int or tuple/list of 2 integer, specifying the stride length + | of the convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch_size, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch_size, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | dilation_rate: int or tuple/list of 2 integers, specifying the dilation + | rate to use for dilated convolution. + | groups: A positive int specifying the number of groups in which the + | input is split along the channel axis. Each group is convolved + | separately with `filters // groups` filters. The output is the + | concatenation of all the `groups` results along the channel axis. + | Input channels and `filters` must both be divisible by `groups`. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | kernel_initializer: Initializer for the convolution kernel. If `None`, + | the default initializer (`"glorot_uniform"`) will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | kernel_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | kernel_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: `(batch_size, height, width, channels)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: `(batch_size, channels, height, width)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` + | + | Returns: + | A 4D tensor representing `activation(conv2d(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | Example: + | + | >>> x = np.random.rand(4, 10, 10, 128) + | >>> y = keras.layers.Conv2D(32, 3, activation='relu')(x) + | >>> print(y.shape) + | (4, 8, 8, 32) + | + | Method resolution order: + | Conv2D + | keras.src.layers.convolutional.base_conv.BaseConv + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=(1, 1), + | padding='valid', + | data_format=None, + | dilation_rate=(1, 1), + | groups=1, + | activation=None, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_conv_2d_transpose.txt b/.tether/man/layer_conv_2d_transpose.txt new file mode 100644 index 0000000000..2116326675 --- /dev/null +++ b/.tether/man/layer_conv_2d_transpose.txt @@ -0,0 +1,121 @@ +Help on class Conv2DTranspose in module keras.src.layers.convolutional.conv2d_transpose: + +class Conv2DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose) + | Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | 2D transposed convolution layer. + | + | The need for transposed convolutions generally arise from the desire to use + | a transformation going in the opposite direction of a normal convolution, + | i.e., from something that has the shape of the output of some convolution + | to something that has the shape of its input while maintaining a + | connectivity pattern that is compatible with said convolution. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the transposed convolution). + | kernel_size: int or tuple/list of 1 integer, specifying the size of the + | transposed convolution window. + | strides: int or tuple/list of 1 integer, specifying the stride length + | of the transposed convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch_size, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch_size, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | dilation_rate: int or tuple/list of 1 integers, specifying the dilation + | rate to use for dilated transposed convolution. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | kernel_initializer: Initializer for the convolution kernel. If `None`, + | the default initializer (`"glorot_uniform"`) will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | kernel_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | kernel_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: `(batch_size, height, width, channels)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: `(batch_size, channels, height, width)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` + | + | Returns: + | A 4D tensor representing + | `activation(conv2d_transpose(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | References: + | - [A guide to convolution arithmetic for deep learning]( + | https://arxiv.org/abs/1603.07285v1) + | - [Deconvolutional Networks]( + | https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + | + | Example: + | + | >>> x = np.random.rand(4, 10, 8, 128) + | >>> y = keras.layers.Conv2DTranspose(32, 2, 2, activation='relu')(x) + | >>> print(y.shape) + | (4, 20, 16, 32) + | + | Method resolution order: + | Conv2DTranspose + | keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=(1, 1), + | padding='valid', + | data_format=None, + | dilation_rate=(1, 1), + | activation=None, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_conv_3d.txt b/.tether/man/layer_conv_3d.txt new file mode 100644 index 0000000000..9419fe72ce --- /dev/null +++ b/.tether/man/layer_conv_3d.txt @@ -0,0 +1,126 @@ +Help on class Conv3D in module keras.src.layers.convolutional.conv3d: + +class Conv3D(keras.src.layers.convolutional.base_conv.BaseConv) + | Conv3D(filters, kernel_size, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1), groups=1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | 3D convolution layer. + | + | This layer creates a convolution kernel that is convolved with the layer + | input over a single spatial (or temporal) dimension to produce a tensor of + | outputs. If `use_bias` is True, a bias vector is created and added to the + | outputs. Finally, if `activation` is not `None`, it is applied to the + | outputs as well. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the convolution). + | kernel_size: int or tuple/list of 3 integer, specifying the size of the + | convolution window. + | strides: int or tuple/list of 3 integer, specifying the stride length + | of the convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | It defaults to the `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json`. If you never set it, then it + | will be `"channels_last"`. + | dilation_rate: int or tuple/list of 3 integers, specifying the dilation + | rate to use for dilated convolution. + | groups: A positive int specifying the number of groups in which the + | input is split along the channel axis. Each group is convolved + | separately with `filters // groups` filters. The output is the + | concatenation of all the `groups` results along the channel axis. + | Input channels and `filters` must both be divisible by `groups`. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | kernel_initializer: Initializer for the convolution kernel. If `None`, + | the default initializer (`"glorot_uniform"`) will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | kernel_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | kernel_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + | + | Output shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3, + | filters)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, filters, new_spatial_dim1, new_spatial_dim2, + | new_spatial_dim3)` + | + | Returns: + | A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | Example: + | + | >>> x = np.random.rand(4, 10, 10, 10, 128) + | >>> y = keras.layers.Conv3D(32, 3, activation='relu')(x) + | >>> print(y.shape) + | (4, 8, 8, 8, 32) + | + | Method resolution order: + | Conv3D + | keras.src.layers.convolutional.base_conv.BaseConv + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=(1, 1, 1), + | padding='valid', + | data_format=None, + | dilation_rate=(1, 1, 1), + | groups=1, + | activation=None, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_conv_3d_transpose.txt b/.tether/man/layer_conv_3d_transpose.txt new file mode 100644 index 0000000000..37c918684f --- /dev/null +++ b/.tether/man/layer_conv_3d_transpose.txt @@ -0,0 +1,126 @@ +Help on class Conv3DTranspose in module keras.src.layers.convolutional.conv3d_transpose: + +class Conv3DTranspose(keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose) + | Conv3DTranspose(filters, kernel_size, strides=(1, 1, 1), padding='valid', data_format=None, dilation_rate=(1, 1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | 3D transposed convolution layer. + | + | The need for transposed convolutions generally arise from the desire to use + | a transformation going in the opposite direction of a normal convolution, + | i.e., from something that has the shape of the output of some convolution + | to something that has the shape of its input while maintaining a + | connectivity pattern that is compatible with said convolution. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the transposed convolution). + | kernel_size: int or tuple/list of 1 integer, specifying the size of the + | transposed convolution window. + | strides: int or tuple/list of 1 integer, specifying the stride length + | of the transposed convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | It defaults to the `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json`. If you never set it, then it + | will be `"channels_last"`. + | dilation_rate: int or tuple/list of 1 integers, specifying the dilation + | rate to use for dilated transposed convolution. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | kernel_initializer: Initializer for the convolution kernel. If `None`, + | the default initializer (`"glorot_uniform"`) will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | kernel_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | kernel_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + | + | Output shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3, + | filters)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, filters, new_spatial_dim1, new_spatial_dim2, + | new_spatial_dim3)` + | + | Returns: + | A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | References: + | - [A guide to convolution arithmetic for deep learning]( + | https://arxiv.org/abs/1603.07285v1) + | - [Deconvolutional Networks]( + | https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) + | + | Example: + | + | >>> x = np.random.rand(4, 10, 8, 12, 128) + | >>> y = keras.layers.Conv3DTranspose(32, 2, 2, activation='relu')(x) + | >>> print(y.shape) + | (4, 20, 16, 24, 32) + | + | Method resolution order: + | Conv3DTranspose + | keras.src.layers.convolutional.base_conv_transpose.BaseConvTranspose + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=(1, 1, 1), + | padding='valid', + | data_format=None, + | dilation_rate=(1, 1, 1), + | activation=None, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_conv_lstm_1d.txt b/.tether/man/layer_conv_lstm_1d.txt new file mode 100644 index 0000000000..0e9ff1e881 --- /dev/null +++ b/.tether/man/layer_conv_lstm_1d.txt @@ -0,0 +1,168 @@ +Help on class ConvLSTM1D in module keras.src.layers.rnn.conv_lstm1d: + +class ConvLSTM1D(keras.src.layers.rnn.conv_lstm.ConvLSTM) + | ConvLSTM1D(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, return_sequences=False, return_state=False, go_backwards=False, stateful=False, **kwargs) + | + | 1D Convolutional LSTM. + | + | Similar to an LSTM layer, but the input transformations + | and recurrent transformations are both convolutional. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the convolution). + | kernel_size: int or tuple/list of 1 integer, specifying the size of + | the convolution window. + | strides: int or tuple/list of 1 integer, specifying the stride length + | of the convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the + | same height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 1 integers, specifying the dilation + | rate to use for dilated convolution. + | activation: Activation function to use. By default hyperbolic tangent + | activation function is applied (`tanh(x)`). + | recurrent_activation: Activation function to use for the recurrent step. + | use_bias: Boolean, whether the layer uses a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. + | recurrent_initializer: Initializer for the `recurrent_kernel` weights + | matrix, used for the linear transformation of the recurrent state. + | bias_initializer: Initializer for the bias vector. + | unit_forget_bias: Boolean. If `True`, add 1 to the bias of + | the forget gate at initialization. + | Use in combination with `bias_initializer="zeros"`. + | This is recommended in [Jozefowicz et al., 2015]( + | http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. + | bias_regularizer: Regularizer function applied to the bias vector. + | activity_regularizer: Regularizer function applied to. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. + | bias_constraint: Constraint function applied to the bias vector. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. + | seed: Random seed for dropout. + | return_sequences: Boolean. Whether to return the last output + | in the output sequence, or the full sequence. Default: `False`. + | return_state: Boolean. Whether to return the last state in addition + | to the output. Default: `False`. + | go_backwards: Boolean (default: `False`). + | If `True`, process the input sequence backwards and return the + | reversed sequence. + | stateful: Boolean (default False). If `True`, the last state + | for each sample at index i in a batch will be used as initial + | state for the sample of index i in the following batch. + | unroll: Boolean (default: `False`). + | If `True`, the network will be unrolled, + | else a symbolic loop will be used. + | Unrolling can speed-up a RNN, + | although it tends to be more memory-intensive. + | Unrolling is only suitable for short sequences. + | + | + | Call arguments: + | inputs: A 4D tensor. + | initial_state: List of initial state tensors to be passed to the first + | call of the cell. + | mask: Binary tensor of shape `(samples, timesteps)` indicating whether a + | given timestep should be masked. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. + | This is only relevant if `dropout` or `recurrent_dropout` are set. + | + | Input shape: + | + | - If `data_format="channels_first"`: + | 4D tensor with shape: `(samples, time, channels, rows)` + | - If `data_format="channels_last"`: + | 4D tensor with shape: `(samples, time, rows, channels)` + | + | Output shape: + | + | - If `return_state`: a list of tensors. The first tensor is the output. + | The remaining tensors are the last states, + | each 3D tensor with shape: `(samples, filters, new_rows)` if + | `data_format='channels_first'` + | or shape: `(samples, new_rows, filters)` if + | `data_format='channels_last'`. + | `rows` values might have changed due to padding. + | - If `return_sequences`: 4D tensor with shape: `(samples, timesteps, + | filters, new_rows)` if data_format='channels_first' + | or shape: `(samples, timesteps, new_rows, filters)` if + | `data_format='channels_last'`. + | - Else, 3D tensor with shape: `(samples, filters, new_rows)` if + | `data_format='channels_first'` + | or shape: `(samples, new_rows, filters)` if + | `data_format='channels_last'`. + | + | References: + | + | - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) + | (the current implementation does not include the feedback loop on the + | cells output). + | + | Method resolution order: + | ConvLSTM1D + | keras.src.layers.rnn.conv_lstm.ConvLSTM + | keras.src.layers.rnn.rnn.RNN + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=1, + | padding='valid', + | data_format=None, + | dilation_rate=1, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | unit_forget_bias=True, + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | return_sequences=False, + | return_state=False, + | go_backwards=False, + | stateful=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_conv_lstm_2d.txt b/.tether/man/layer_conv_lstm_2d.txt new file mode 100644 index 0000000000..f8217306e0 --- /dev/null +++ b/.tether/man/layer_conv_lstm_2d.txt @@ -0,0 +1,168 @@ +Help on class ConvLSTM2D in module keras.src.layers.rnn.conv_lstm2d: + +class ConvLSTM2D(keras.src.layers.rnn.conv_lstm.ConvLSTM) + | ConvLSTM2D(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, return_sequences=False, return_state=False, go_backwards=False, stateful=False, **kwargs) + | + | 2D Convolutional LSTM. + | + | Similar to an LSTM layer, but the input transformations + | and recurrent transformations are both convolutional. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the convolution). + | kernel_size: int or tuple/list of 2 integers, specifying the size of the + | convolution window. + | strides: int or tuple/list of 2 integers, specifying the stride length + | of the convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 2 integers, specifying the dilation + | rate to use for dilated convolution. + | activation: Activation function to use. By default hyperbolic tangent + | activation function is applied (`tanh(x)`). + | recurrent_activation: Activation function to use for the recurrent step. + | use_bias: Boolean, whether the layer uses a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. + | recurrent_initializer: Initializer for the `recurrent_kernel` weights + | matrix, used for the linear transformation of the recurrent state. + | bias_initializer: Initializer for the bias vector. + | unit_forget_bias: Boolean. If `True`, add 1 to the bias of the forget + | gate at initialization. + | Use in combination with `bias_initializer="zeros"`. + | This is recommended in [Jozefowicz et al., 2015]( + | http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. + | bias_regularizer: Regularizer function applied to the bias vector. + | activity_regularizer: Regularizer function applied to. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. + | bias_constraint: Constraint function applied to the bias vector. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. + | seed: Random seed for dropout. + | return_sequences: Boolean. Whether to return the last output + | in the output sequence, or the full sequence. Default: `False`. + | return_state: Boolean. Whether to return the last state in addition + | to the output. Default: `False`. + | go_backwards: Boolean (default: `False`). + | If `True`, process the input sequence backwards and return the + | reversed sequence. + | stateful: Boolean (default False). If `True`, the last state + | for each sample at index i in a batch will be used as initial + | state for the sample of index i in the following batch. + | unroll: Boolean (default: `False`). + | If `True`, the network will be unrolled, + | else a symbolic loop will be used. + | Unrolling can speed-up a RNN, + | although it tends to be more memory-intensive. + | Unrolling is only suitable for short sequences. + | + | + | Call arguments: + | inputs: A 5D tensor. + | mask: Binary tensor of shape `(samples, timesteps)` indicating whether a + | given timestep should be masked. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. + | This is only relevant if `dropout` or `recurrent_dropout` are set. + | initial_state: List of initial state tensors to be passed to the first + | call of the cell. + | + | Input shape: + | + | - If `data_format='channels_first'`: + | 5D tensor with shape: `(samples, time, channels, rows, cols)` + | - If `data_format='channels_last'`: + | 5D tensor with shape: `(samples, time, rows, cols, channels)` + | + | Output shape: + | + | - If `return_state`: a list of tensors. The first tensor is the output. + | The remaining tensors are the last states, + | each 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if + | `data_format='channels_first'` + | or shape: `(samples, new_rows, new_cols, filters)` if + | `data_format='channels_last'`. `rows` and `cols` values might have + | changed due to padding. + | - If `return_sequences`: 5D tensor with shape: `(samples, timesteps, + | filters, new_rows, new_cols)` if data_format='channels_first' + | or shape: `(samples, timesteps, new_rows, new_cols, filters)` if + | `data_format='channels_last'`. + | - Else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if + | `data_format='channels_first'` + | or shape: `(samples, new_rows, new_cols, filters)` if + | `data_format='channels_last'`. + | + | References: + | + | - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) + | (the current implementation does not include the feedback loop on the + | cells output). + | + | Method resolution order: + | ConvLSTM2D + | keras.src.layers.rnn.conv_lstm.ConvLSTM + | keras.src.layers.rnn.rnn.RNN + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=1, + | padding='valid', + | data_format=None, + | dilation_rate=1, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | unit_forget_bias=True, + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | return_sequences=False, + | return_state=False, + | go_backwards=False, + | stateful=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_conv_lstm_3d.txt b/.tether/man/layer_conv_lstm_3d.txt new file mode 100644 index 0000000000..397aca9c7e --- /dev/null +++ b/.tether/man/layer_conv_lstm_3d.txt @@ -0,0 +1,167 @@ +Help on class ConvLSTM3D in module keras.src.layers.rnn.conv_lstm3d: + +class ConvLSTM3D(keras.src.layers.rnn.conv_lstm.ConvLSTM) + | ConvLSTM3D(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, return_sequences=False, return_state=False, go_backwards=False, stateful=False, **kwargs) + | + | 3D Convolutional LSTM. + | + | Similar to an LSTM layer, but the input transformations + | and recurrent transformations are both convolutional. + | + | Args: + | filters: int, the dimension of the output space (the number of filters + | in the convolution). + | kernel_size: int or tuple/list of 3 integers, specifying the size of the + | convolution window. + | strides: int or tuple/list of 3 integers, specifying the stride length + | of the convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 3 integers, specifying the dilation + | rate to use for dilated convolution. + | activation: Activation function to use. By default hyperbolic tangent + | activation function is applied (`tanh(x)`). + | recurrent_activation: Activation function to use for the recurrent step. + | use_bias: Boolean, whether the layer uses a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. + | recurrent_initializer: Initializer for the `recurrent_kernel` weights + | matrix, used for the linear transformation of the recurrent state. + | bias_initializer: Initializer for the bias vector. + | unit_forget_bias: Boolean. If `True`, add 1 to the bias of the forget + | gate at initialization. + | Use in combination with `bias_initializer="zeros"`. + | This is recommended in [Jozefowicz et al., 2015]( + | http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf) + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. + | bias_regularizer: Regularizer function applied to the bias vector. + | activity_regularizer: Regularizer function applied to. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. + | bias_constraint: Constraint function applied to the bias vector. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. + | seed: Random seed for dropout. + | return_sequences: Boolean. Whether to return the last output + | in the output sequence, or the full sequence. Default: `False`. + | return_state: Boolean. Whether to return the last state in addition + | to the output. Default: `False`. + | go_backwards: Boolean (default: `False`). + | If `True`, process the input sequence backwards and return the + | reversed sequence. + | stateful: Boolean (default False). If `True`, the last state + | for each sample at index i in a batch will be used as initial + | state for the sample of index i in the following batch. + | unroll: Boolean (default: `False`). + | If `True`, the network will be unrolled, + | else a symbolic loop will be used. + | Unrolling can speed-up a RNN, + | although it tends to be more memory-intensive. + | Unrolling is only suitable for short sequences. + | + | + | Call arguments: + | inputs: A 6D tensor. + | mask: Binary tensor of shape `(samples, timesteps)` indicating whether a + | given timestep should be masked. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. + | This is only relevant if `dropout` or `recurrent_dropout` are set. + | initial_state: List of initial state tensors to be passed to the first + | call of the cell. + | + | Input shape: + | + | - If `data_format='channels_first'`: + | 5D tensor with shape: `(samples, time, channels, *spatial_dims)` + | - If `data_format='channels_last'`: + | 5D tensor with shape: `(samples, time, *spatial_dims, channels)` + | + | Output shape: + | + | - If `return_state`: a list of tensors. The first tensor is the output. + | The remaining tensors are the last states, + | each 4D tensor with shape: `(samples, filters, *spatial_dims)` if + | `data_format='channels_first'` + | or shape: `(samples, *spatial_dims, filters)` if + | `data_format='channels_last'`. + | - If `return_sequences`: 5D tensor with shape: `(samples, timesteps, + | filters, *spatial_dims)` if data_format='channels_first' + | or shape: `(samples, timesteps, *spatial_dims, filters)` if + | `data_format='channels_last'`. + | - Else, 4D tensor with shape: `(samples, filters, *spatial_dims)` if + | `data_format='channels_first'` + | or shape: `(samples, *spatial_dims, filters)` if + | `data_format='channels_last'`. + | + | References: + | + | - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1) + | (the current implementation does not include the feedback loop on the + | cells output). + | + | Method resolution order: + | ConvLSTM3D + | keras.src.layers.rnn.conv_lstm.ConvLSTM + | keras.src.layers.rnn.rnn.RNN + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=1, + | padding='valid', + | data_format=None, + | dilation_rate=1, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | unit_forget_bias=True, + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | return_sequences=False, + | return_state=False, + | go_backwards=False, + | stateful=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_cropping_1d.txt b/.tether/man/layer_cropping_1d.txt new file mode 100644 index 0000000000..e99d41b886 --- /dev/null +++ b/.tether/man/layer_cropping_1d.txt @@ -0,0 +1,69 @@ +Help on class Cropping1D in module keras.src.layers.reshaping.cropping1d: + +class Cropping1D(keras.src.layers.layer.Layer) + | Cropping1D(cropping=(1, 1), **kwargs) + | + | Cropping layer for 1D input (e.g. temporal sequence). + | + | It crops along the time dimension (axis 1). + | + | Example: + | + | >>> input_shape = (2, 3, 2) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> x + | [[[ 0 1] + | [ 2 3] + | [ 4 5]] + | [[ 6 7] + | [ 8 9] + | [10 11]]] + | >>> y = keras.layers.Cropping1D(cropping=1)(x) + | >>> y + | [[[2 3]] + | [[8 9]]] + | + | Args: + | cropping: Int, or tuple of int (length 2), or dictionary. + | - If int: how many units should be trimmed off at the beginning and + | end of the cropping dimension (axis 1). + | - If tuple of 2 ints: how many units should be trimmed off at the + | beginning and end of the cropping dimension + | (`(left_crop, right_crop)`). + | + | Input shape: + | 3D tensor with shape `(batch_size, axis_to_crop, features)` + | + | Output shape: + | 3D tensor with shape `(batch_size, cropped_axis, features)` + | + | Method resolution order: + | Cropping1D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | cropping=(1, 1), + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_cropping_2d.txt b/.tether/man/layer_cropping_2d.txt new file mode 100644 index 0000000000..01e5487b65 --- /dev/null +++ b/.tether/man/layer_cropping_2d.txt @@ -0,0 +1,81 @@ +Help on class Cropping2D in module keras.src.layers.reshaping.cropping2d: + +class Cropping2D(keras.src.layers.layer.Layer) + | Cropping2D(cropping=((0, 0), (0, 0)), data_format=None, **kwargs) + | + | Cropping layer for 2D input (e.g. picture). + | + | It crops along spatial dimensions, i.e. height and width. + | + | Example: + | + | >>> input_shape = (2, 28, 28, 3) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> y = keras.layers.Cropping2D(cropping=((2, 2), (4, 4)))(x) + | >>> y.shape + | (2, 24, 20, 3) + | + | Args: + | cropping: Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. + | - If int: the same symmetric cropping is applied to height and + | width. + | - If tuple of 2 ints: interpreted as two different symmetric + | cropping values for height and width: + | `(symmetric_height_crop, symmetric_width_crop)`. + | - If tuple of 2 tuples of 2 ints: interpreted as + | `((top_crop, bottom_crop), (left_crop, right_crop))`. + | data_format: A string, one of `"channels_last"` (default) or + | `"channels_first"`. The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch_size, height, width, channels)` while `"channels_first"` + | corresponds to inputs with shape + | `(batch_size, channels, height, width)`. + | When unspecified, uses `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json` (if exists). Defaults to + | `"channels_last"`. + | + | Input shape: + | 4D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, height, width, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, height, width)` + | + | Output shape: + | 4D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, cropped_height, cropped_width, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, cropped_height, cropped_width)` + | + | Method resolution order: + | Cropping2D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | cropping=((0, 0), (0, 0)), + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_cropping_3d.txt b/.tether/man/layer_cropping_3d.txt new file mode 100644 index 0000000000..82eb9d71da --- /dev/null +++ b/.tether/man/layer_cropping_3d.txt @@ -0,0 +1,84 @@ +Help on class Cropping3D in module keras.src.layers.reshaping.cropping3d: + +class Cropping3D(keras.src.layers.layer.Layer) + | Cropping3D(cropping=((1, 1), (1, 1), (1, 1)), data_format=None, **kwargs) + | + | Cropping layer for 3D data (e.g. spatial or spatio-temporal). + | + | Example: + | + | >>> input_shape = (2, 28, 28, 10, 3) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> y = keras.layers.Cropping3D(cropping=(2, 4, 2))(x) + | >>> y.shape + | (2, 24, 20, 6, 3) + | + | Args: + | cropping: Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. + | - If int: the same symmetric cropping is applied to depth, height, + | and width. + | - If tuple of 3 ints: interpreted as three different symmetric + | cropping values for depth, height, and width: + | `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. + | - If tuple of 3 tuples of 2 ints: interpreted as + | `((left_dim1_crop, right_dim1_crop), (left_dim2_crop, + | right_dim2_crop), (left_dim3_crop, right_dim3_crop))`. + | data_format: A string, one of `"channels_last"` (default) or + | `"channels_first"`. The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | When unspecified, uses `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json` (if exists). Defaults to + | `"channels_last"`. + | + | Input shape: + | 5D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, first_axis_to_crop, second_axis_to_crop, + | third_axis_to_crop, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, first_axis_to_crop, second_axis_to_crop, + | third_axis_to_crop)` + | + | Output shape: + | 5D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, first_cropped_axis, second_cropped_axis, + | third_cropped_axis, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, first_cropped_axis, second_cropped_axis, + | third_cropped_axis)` + | + | Method resolution order: + | Cropping3D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | cropping=((1, 1), (1, 1), (1, 1)), + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_dense.txt b/.tether/man/layer_dense.txt new file mode 100644 index 0000000000..8018b89d20 --- /dev/null +++ b/.tether/man/layer_dense.txt @@ -0,0 +1,146 @@ +Help on class Dense in module keras.src.layers.core.dense: + +class Dense(keras.src.layers.layer.Layer) + | Dense(units, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, **kwargs) + | + | Just your regular densely-connected NN layer. + | + | `Dense` implements the operation: + | `output = activation(dot(input, kernel) + bias)` + | where `activation` is the element-wise activation function + | passed as the `activation` argument, `kernel` is a weights matrix + | created by the layer, and `bias` is a bias vector created by the layer + | (only applicable if `use_bias` is `True`). + | + | Note: If the input to the layer has a rank greater than 2, `Dense` + | computes the dot product between the `inputs` and the `kernel` along the + | last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`). + | For example, if input has dimensions `(batch_size, d0, d1)`, then we create + | a `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2 + | of the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are + | `batch_size * d0` such sub-tensors). The output in this case will have + | shape `(batch_size, d0, units)`. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. + | If you don't specify anything, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, whether the layer uses a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix. + | bias_initializer: Initializer for the bias vector. + | kernel_regularizer: Regularizer function applied to + | the `kernel` weights matrix. + | bias_regularizer: Regularizer function applied to the bias vector. + | activity_regularizer: Regularizer function applied to + | the output of the layer (its "activation"). + | kernel_constraint: Constraint function applied to + | the `kernel` weights matrix. + | bias_constraint: Constraint function applied to the bias vector. + | lora_rank: Optional integer. If set, the layer's forward pass + | will implement LoRA (Low-Rank Adaptation) + | with the provided rank. LoRA sets the layer's kernel + | to non-trainable and replaces it with a delta over the + | original kernel, obtained via multiplying two lower-rank + | trainable matrices. This can be useful to reduce the + | computation cost of fine-tuning large dense layers. + | You can also enable LoRA on an existing + | `Dense` layer by calling `layer.enable_lora(rank)`. + | + | Input shape: + | N-D tensor with shape: `(batch_size, ..., input_dim)`. + | The most common situation would be + | a 2D input with shape `(batch_size, input_dim)`. + | + | Output shape: + | N-D tensor with shape: `(batch_size, ..., units)`. + | For instance, for a 2D input with shape `(batch_size, input_dim)`, + | the output would have shape `(batch_size, units)`. + | + | Method resolution order: + | Dense + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation=None, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | lora_rank=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | enable_lora( + | self, + | rank, + | a_initializer='he_uniform', + | b_initializer='zeros' + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + | quantize(self, mode) + | + | quantized_build( + | self, + | input_shape, + | mode + | ) + | + | quantized_call(self, inputs) + | + | save_own_variables(self, store) + | Saves the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is saved upon calling `model.save()`. + | + | Args: + | store: Dict where the state of the model will be saved. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | kernel + | + | ---------------------------------------------------------------------- + | Data and other attributes defined here: + | + | QUANTIZATION_MODE_ERROR_TEMPLATE = "Invalid quantization mode. Expecte... + | + diff --git a/.tether/man/layer_depthwise_conv_1d.txt b/.tether/man/layer_depthwise_conv_1d.txt new file mode 100644 index 0000000000..b648ea5a31 --- /dev/null +++ b/.tether/man/layer_depthwise_conv_1d.txt @@ -0,0 +1,130 @@ +Help on class DepthwiseConv1D in module keras.src.layers.convolutional.depthwise_conv1d: + +class DepthwiseConv1D(keras.src.layers.convolutional.base_depthwise_conv.BaseDepthwiseConv) + | DepthwiseConv1D(kernel_size, strides=1, padding='valid', depth_multiplier=1, data_format=None, dilation_rate=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, **kwargs) + | + | 1D depthwise convolution layer. + | + | Depthwise convolution is a type of convolution in which each input channel + | is convolved with a different kernel (called a depthwise kernel). You can + | understand depthwise convolution as the first step in a depthwise separable + | convolution. + | + | It is implemented via the following steps: + | + | - Split the input into individual channels. + | - Convolve each channel with an individual depthwise kernel with + | `depth_multiplier` output channels. + | - Concatenate the convolved outputs along the channels axis. + | + | Unlike a regular 1D convolution, depthwise convolution does not mix + | information across different input channels. + | + | The `depth_multiplier` argument determines how many filters are applied to + | one input channel. As such, it controls the amount of output channels that + | are generated per input channel in the depthwise step. + | + | Args: + | kernel_size: int or tuple/list of 1 integer, specifying the size of the + | depthwise convolution window. + | strides: int or tuple/list of 1 integer, specifying the stride length + | of the convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | depth_multiplier: The number of depthwise convolution output channels + | for each input channel. The total number of depthwise convolution + | output channels will be equal to `input_channel * depth_multiplier`. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 1 integers, specifying the dilation + | rate to use for dilated convolution. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | depthwise_initializer: Initializer for the convolution kernel. + | If `None`, the default initializer (`"glorot_uniform"`) + | will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | depthwise_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | depthwise_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: `(batch_shape, steps, channels)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: `(batch_shape, channels, steps)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: + | `(batch_shape, new_steps, channels * depth_multiplier)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: + | `(batch_shape, channels * depth_multiplier, new_steps)` + | + | Returns: + | A 3D tensor representing + | `activation(depthwise_conv1d(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | Example: + | + | >>> x = np.random.rand(4, 10, 12) + | >>> y = keras.layers.DepthwiseConv1D(3, 3, 2, activation='relu')(x) + | >>> print(y.shape) + | (4, 4, 36) + | + | Method resolution order: + | DepthwiseConv1D + | keras.src.layers.convolutional.base_depthwise_conv.BaseDepthwiseConv + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | kernel_size, + | strides=1, + | padding='valid', + | depth_multiplier=1, + | data_format=None, + | dilation_rate=1, + | activation=None, + | use_bias=True, + | depthwise_initializer='glorot_uniform', + | bias_initializer='zeros', + | depthwise_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | depthwise_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_depthwise_conv_2d.txt b/.tether/man/layer_depthwise_conv_2d.txt new file mode 100644 index 0000000000..e3e5b2ba8e --- /dev/null +++ b/.tether/man/layer_depthwise_conv_2d.txt @@ -0,0 +1,131 @@ +Help on class DepthwiseConv2D in module keras.src.layers.convolutional.depthwise_conv2d: + +class DepthwiseConv2D(keras.src.layers.convolutional.base_depthwise_conv.BaseDepthwiseConv) + | DepthwiseConv2D(kernel_size, strides=(1, 1), padding='valid', depth_multiplier=1, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, depthwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, bias_constraint=None, **kwargs) + | + | 2D depthwise convolution layer. + | + | Depthwise convolution is a type of convolution in which each input channel + | is convolved with a different kernel (called a depthwise kernel). You can + | understand depthwise convolution as the first step in a depthwise separable + | convolution. + | + | It is implemented via the following steps: + | + | - Split the input into individual channels. + | - Convolve each channel with an individual depthwise kernel with + | `depth_multiplier` output channels. + | - Concatenate the convolved outputs along the channels axis. + | + | Unlike a regular 2D convolution, depthwise convolution does not mix + | information across different input channels. + | + | The `depth_multiplier` argument determines how many filters are applied to + | one input channel. As such, it controls the amount of output channels that + | are generated per input channel in the depthwise step. + | + | Args: + | kernel_size: int or tuple/list of 2 integer, specifying the size of the + | depthwise convolution window. + | strides: int or tuple/list of 2 integer, specifying the stride length + | of the depthwise convolution. `strides > 1` is incompatible with + | `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | depth_multiplier: The number of depthwise convolution output channels + | for each input channel. The total number of depthwise convolution + | output channels will be equal to `input_channel * depth_multiplier`. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file + | at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 2 integers, specifying the dilation + | rate to use for dilated convolution. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | depthwise_initializer: Initializer for the convolution kernel. + | If `None`, the default initializer (`"glorot_uniform"`) + | will be used. + | bias_initializer: Initializer for the bias vector. If `None`, the + | default initializer (`"zeros"`) will be used. + | depthwise_regularizer: Optional regularizer for the convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | depthwise_constraint: Optional projection function to be applied to the + | kernel after being updated by an `Optimizer` (e.g. used to implement + | norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). Constraints + | are not safe to use when doing asynchronous distributed training. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: `(batch_size, height, width, channels)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: `(batch_size, channels, height, width)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: + | `(batch_size, new_height, new_width, channels * depth_multiplier)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: + | `(batch_size, channels * depth_multiplier, new_height, new_width)` + | + | Returns: + | A 4D tensor representing + | `activation(depthwise_conv2d(inputs, kernel) + bias)`. + | + | Raises: + | ValueError: when both `strides > 1` and `dilation_rate > 1`. + | + | Example: + | + | >>> x = np.random.rand(4, 10, 10, 12) + | >>> y = keras.layers.DepthwiseConv2D(3, 3, activation='relu')(x) + | >>> print(y.shape) + | (4, 8, 8, 36) + | + | Method resolution order: + | DepthwiseConv2D + | keras.src.layers.convolutional.base_depthwise_conv.BaseDepthwiseConv + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | kernel_size, + | strides=(1, 1), + | padding='valid', + | depth_multiplier=1, + | data_format=None, + | dilation_rate=(1, 1), + | activation=None, + | use_bias=True, + | depthwise_initializer='glorot_uniform', + | bias_initializer='zeros', + | depthwise_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | depthwise_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_discretization.txt b/.tether/man/layer_discretization.txt new file mode 100644 index 0000000000..9d13f06f57 --- /dev/null +++ b/.tether/man/layer_discretization.txt @@ -0,0 +1,167 @@ +Help on class Discretization in module keras.src.layers.preprocessing.discretization: + +class Discretization(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | Discretization(bin_boundaries=None, num_bins=None, epsilon=0.01, output_mode='int', sparse=False, dtype=None, name=None) + | + | A preprocessing layer which buckets continuous features by ranges. + | + | This layer will place each element of its input data into one of several + | contiguous ranges and output an integer index indicating which range each + | element was placed in. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Input shape: + | Any array of dimension 2 or higher. + | + | Output shape: + | Same as input shape. + | + | Arguments: + | bin_boundaries: A list of bin boundaries. + | The leftmost and rightmost bins + | will always extend to `-inf` and `inf`, + | so `bin_boundaries=[0., 1., 2.]` + | generates bins `(-inf, 0.)`, `[0., 1.)`, `[1., 2.)`, + | and `[2., +inf)`. + | If this option is set, `adapt()` should not be called. + | num_bins: The integer number of bins to compute. + | If this option is set, + | `adapt()` should be called to learn the bin boundaries. + | epsilon: Error tolerance, typically a small fraction + | close to zero (e.g. 0.01). Higher values of epsilon increase + | the quantile approximation, and hence result in more + | unequal buckets, but could improve performance + | and resource consumption. + | output_mode: Specification for the output of the layer. + | Values can be `"int"`, `"one_hot"`, `"multi_hot"`, or + | `"count"` configuring the layer as follows: + | - `"int"`: Return the discretized bin indices directly. + | - `"one_hot"`: Encodes each individual element in the + | input into an array the same size as `num_bins`, + | containing a 1 at the input's bin + | index. If the last dimension is size 1, will encode on that + | dimension. If the last dimension is not size 1, + | will append a new dimension for the encoded output. + | - `"multi_hot"`: Encodes each sample in the input into a + | single array the same size as `num_bins`, + | containing a 1 for each bin index + | index present in the sample. + | Treats the last dimension as the sample + | dimension, if input shape is `(..., sample_length)`, + | output shape will be `(..., num_tokens)`. + | - `"count"`: As `"multi_hot"`, but the int array contains + | a count of the number of times the bin index appeared + | in the sample. + | Defaults to `"int"`. + | sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, + | and `"count"` output modes. Only supported with TensorFlow + | backend. If `True`, returns a `SparseTensor` instead of + | a dense `Tensor`. Defaults to `False`. + | + | Examples: + | + | Discretize float values based on provided buckets. + | >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) + | >>> layer = Discretization(bin_boundaries=[0., 1., 2.]) + | >>> layer(input) + | array([[0, 2, 3, 1], + | [1, 3, 2, 1]]) + | + | Discretize float values based on a number of buckets to compute. + | >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]]) + | >>> layer = Discretization(num_bins=4, epsilon=0.01) + | >>> layer.adapt(input) + | >>> layer(input) + | array([[0, 2, 3, 2], + | [1, 3, 3, 1]]) + | + | Method resolution order: + | Discretization + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | bin_boundaries=None, + | num_bins=None, + | epsilon=0.01, + | output_mode='int', + | sparse=False, + | dtype=None, + | name=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | adapt( + | self, + | data, + | steps=None + | ) + | Computes bin boundaries from quantiles in a input dataset. + | + | Calling `adapt()` on a `Discretization` layer is an alternative to + | passing in a `bin_boundaries` argument during construction. A + | `Discretization` layer should always be either adapted over a dataset or + | passed `bin_boundaries`. + | + | During `adapt()`, the layer will estimate the quantile boundaries of the + | input dataset. The number of quantiles can be controlled via the + | `num_bins` argument, and the error tolerance for quantile boundaries can + | be controlled via the `epsilon` argument. + | + | Arguments: + | data: The data to train on. It can be passed either as a + | batched `tf.data.Dataset`, + | or as a NumPy array. + | steps: Integer or `None`. + | Total number of steps (batches of samples) to process. + | If `data` is a `tf.data.Dataset`, and `steps` is `None`, + | `adapt()` will run until the input dataset is exhausted. + | When passing an infinitely + | repeating dataset, you must specify the `steps` argument. This + | argument is not supported with array inputs or list inputs. + | + | build(self, input_shape=None) + | + | call(self, inputs) + | + | compute_output_spec(self, inputs) + | + | finalize_state(self) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + | reset_state(self) + | + | update_state(self, data) + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | input_dtype + | The dtype layer inputs should be converted to. + | + diff --git a/.tether/man/layer_dot.txt b/.tether/man/layer_dot.txt new file mode 100644 index 0000000000..ac74680725 --- /dev/null +++ b/.tether/man/layer_dot.txt @@ -0,0 +1,83 @@ +Help on class Dot in module keras.src.layers.merging.dot: + +class Dot(keras.src.layers.merging.base_merge.Merge) + | Dot(axes, normalize=False, **kwargs) + | + | Computes element-wise dot product of two tensors. + | + | It takes a list of inputs of size 2, and the axes + | corresponding to each input along with the dot product + | is to be performed. + | + | Let's say `x` and `y` are the two input tensors with shapes + | `(2, 3, 5)` and `(2, 10, 3)`. The batch dimension should be + | of same size for both the inputs, and `axes` should correspond + | to the dimensions that have the same size in the corresponding + | inputs. e.g. with `axes=(1, 2)`, the dot product of `x`, and `y` + | will result in a tensor with shape `(2, 5, 10)` + | + | Example: + | + | >>> x = np.arange(10).reshape(1, 5, 2) + | >>> y = np.arange(10, 20).reshape(1, 2, 5) + | >>> keras.layers.Dot(axes=(1, 2))([x, y]) + | + | Usage in a Keras model: + | + | >>> x1 = keras.layers.Dense(8)(np.arange(10).reshape(5, 2)) + | >>> x2 = keras.layers.Dense(8)(np.arange(10, 20).reshape(5, 2)) + | >>> y = keras.layers.Dot(axes=1)([x1, x2]) + | + | Args: + | axes: Integer or tuple of integers, axis or axes along which to + | take the dot product. If a tuple, should be two integers + | corresponding to the desired axis from the first input and the + | desired axis from the second input, respectively. Note that the + | size of the two selected axes must match. + | normalize: Whether to L2-normalize samples along the dot product axis + | before taking the dot product. If set to `True`, then + | the output of the dot product is the cosine proximity + | between the two samples. + | **kwargs: Standard layer keyword arguments. + | + | Returns: + | A tensor, the dot product of the samples from the inputs. + | + | Method resolution order: + | Dot + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axes, + | normalize=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_dropout.txt b/.tether/man/layer_dropout.txt new file mode 100644 index 0000000000..695a7ef86c --- /dev/null +++ b/.tether/man/layer_dropout.txt @@ -0,0 +1,73 @@ +Help on class Dropout in module keras.src.layers.regularization.dropout: + +class Dropout(keras.src.layers.layer.Layer) + | Dropout(rate, noise_shape=None, seed=None, **kwargs) + | + | Applies dropout to the input. + | + | The `Dropout` layer randomly sets input units to 0 with a frequency of + | `rate` at each step during training time, which helps prevent overfitting. + | Inputs not set to 0 are scaled up by `1 / (1 - rate)` such that the sum over + | all inputs is unchanged. + | + | Note that the `Dropout` layer only applies when `training` is set to `True` + | in `call()`, such that no values are dropped during inference. + | When using `model.fit`, `training` will be appropriately set to `True` + | automatically. In other contexts, you can set the argument explicitly + | to `True` when calling the layer. + | + | (This is in contrast to setting `trainable=False` for a `Dropout` layer. + | `trainable` does not affect the layer's behavior, as `Dropout` does + | not have any variables/weights that can be frozen during training.) + | + | Args: + | rate: Float between 0 and 1. Fraction of the input units to drop. + | noise_shape: 1D integer tensor representing the shape of the + | binary dropout mask that will be multiplied with the input. + | For instance, if your inputs have shape + | `(batch_size, timesteps, features)` and + | you want the dropout mask to be the same for all timesteps, + | you can use `noise_shape=(batch_size, 1, features)`. + | seed: A Python integer to use as random seed. + | + | Call arguments: + | inputs: Input tensor (of any rank). + | training: Python boolean indicating whether the layer should behave in + | training mode (adding dropout) or in inference mode (doing nothing). + | + | Method resolution order: + | Dropout + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | rate, + | noise_shape=None, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=False + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_einsum_dense.txt b/.tether/man/layer_einsum_dense.txt new file mode 100644 index 0000000000..cc8386f3c8 --- /dev/null +++ b/.tether/man/layer_einsum_dense.txt @@ -0,0 +1,185 @@ +Help on class EinsumDense in module keras.src.layers.core.einsum_dense: + +class EinsumDense(keras.src.layers.layer.Layer) + | EinsumDense(equation, output_shape, activation=None, bias_axes=None, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, kernel_constraint=None, bias_constraint=None, lora_rank=None, **kwargs) + | + | A layer that uses `einsum` as the backing computation. + | + | This layer can perform einsum calculations of arbitrary dimensionality. + | + | Args: + | equation: An equation describing the einsum to perform. + | This equation must be a valid einsum string of the form + | `ab,bc->ac`, `...ab,bc->...ac`, or + | `ab...,bc->ac...` where 'ab', 'bc', and 'ac' can be any valid einsum + | axis expression sequence. + | output_shape: The expected shape of the output tensor + | (excluding the batch dimension and any dimensions + | represented by ellipses). You can specify `None` for any dimension + | that is unknown or can be inferred from the input shape. + | activation: Activation function to use. If you don't specify anything, + | no activation is applied + | (that is, a "linear" activation: `a(x) = x`). + | bias_axes: A string containing the output dimension(s) + | to apply a bias to. Each character in the `bias_axes` string + | should correspond to a character in the output portion + | of the `equation` string. + | kernel_initializer: Initializer for the `kernel` weights matrix. + | bias_initializer: Initializer for the bias vector. + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. + | bias_regularizer: Regularizer function applied to the bias vector. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. + | bias_constraint: Constraint function applied to the bias vector. + | lora_rank: Optional integer. If set, the layer's forward pass + | will implement LoRA (Low-Rank Adaptation) + | with the provided rank. LoRA sets the layer's kernel + | to non-trainable and replaces it with a delta over the + | original kernel, obtained via multiplying two lower-rank + | trainable matrices + | (the factorization happens on the last dimension). + | This can be useful to reduce the + | computation cost of fine-tuning large dense layers. + | You can also enable LoRA on an existing + | `EinsumDense` layer by calling `layer.enable_lora(rank)`. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Examples: + | + | **Biased dense layer with einsums** + | + | This example shows how to instantiate a standard Keras dense layer using + | einsum operations. This example is equivalent to + | `keras.layers.Dense(64, use_bias=True)`. + | + | >>> layer = keras.layers.EinsumDense("ab,bc->ac", + | ... output_shape=64, + | ... bias_axes="c") + | >>> input_tensor = keras.Input(shape=[32]) + | >>> output_tensor = layer(input_tensor) + | >>> output_tensor.shape + | (None, 64) + | + | **Applying a dense layer to a sequence** + | + | This example shows how to instantiate a layer that applies the same dense + | operation to every element in a sequence. Here, the `output_shape` has two + | values (since there are two non-batch dimensions in the output); the first + | dimension in the `output_shape` is `None`, because the sequence dimension + | `b` has an unknown shape. + | + | >>> layer = keras.layers.EinsumDense("abc,cd->abd", + | ... output_shape=(None, 64), + | ... bias_axes="d") + | >>> input_tensor = keras.Input(shape=[32, 128]) + | >>> output_tensor = layer(input_tensor) + | >>> output_tensor.shape + | (None, 32, 64) + | + | **Applying a dense layer to a sequence using ellipses** + | + | This example shows how to instantiate a layer that applies the same dense + | operation to every element in a sequence, but uses the ellipsis notation + | instead of specifying the batch and sequence dimensions. + | + | Because we are using ellipsis notation and have specified only one axis, the + | `output_shape` arg is a single value. When instantiated in this way, the + | layer can handle any number of sequence dimensions - including the case + | where no sequence dimension exists. + | + | >>> layer = keras.layers.EinsumDense("...x,xy->...y", + | ... output_shape=64, + | ... bias_axes="y") + | >>> input_tensor = keras.Input(shape=[32, 128]) + | >>> output_tensor = layer(input_tensor) + | >>> output_tensor.shape + | (None, 32, 64) + | + | Method resolution order: + | EinsumDense + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | equation, + | output_shape, + | activation=None, + | bias_axes=None, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | lora_rank=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_output_shape(self, _) + | + | enable_lora( + | self, + | rank, + | a_initializer='he_uniform', + | b_initializer='zeros' + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + | quantize(self, mode) + | + | quantized_build( + | self, + | input_shape, + | mode + | ) + | + | quantized_call(self, inputs) + | + | save_own_variables(self, store) + | Saves the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is saved upon calling `model.save()`. + | + | Args: + | store: Dict where the state of the model will be saved. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | kernel + | + | ---------------------------------------------------------------------- + | Data and other attributes defined here: + | + | QUANTIZATION_MODE_ERROR_TEMPLATE = "Invalid quantization mode. Expecte... + | + diff --git a/.tether/man/layer_embedding.txt b/.tether/man/layer_embedding.txt new file mode 100644 index 0000000000..966e9b03be --- /dev/null +++ b/.tether/man/layer_embedding.txt @@ -0,0 +1,154 @@ +Help on class Embedding in module keras.src.layers.core.embedding: + +class Embedding(keras.src.layers.layer.Layer) + | Embedding(input_dim, output_dim, embeddings_initializer='uniform', embeddings_regularizer=None, embeddings_constraint=None, mask_zero=False, weights=None, lora_rank=None, **kwargs) + | + | Turns positive integers (indexes) into dense vectors of fixed size. + | + | e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]` + | + | This layer can only be used on positive integer inputs of a fixed range. + | + | Example: + | + | >>> model = keras.Sequential() + | >>> model.add(keras.layers.Embedding(1000, 64)) + | >>> # The model will take as input an integer matrix of size (batch, + | >>> # input_length), and the largest integer (i.e. word index) in the input + | >>> # should be no larger than 999 (vocabulary size). + | >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch + | >>> # dimension. + | >>> input_array = np.random.randint(1000, size=(32, 10)) + | >>> model.compile('rmsprop', 'mse') + | >>> output_array = model.predict(input_array) + | >>> print(output_array.shape) + | (32, 10, 64) + | + | Args: + | input_dim: Integer. Size of the vocabulary, + | i.e. maximum integer index + 1. + | output_dim: Integer. Dimension of the dense embedding. + | embeddings_initializer: Initializer for the `embeddings` + | matrix (see `keras.initializers`). + | embeddings_regularizer: Regularizer function applied to + | the `embeddings` matrix (see `keras.regularizers`). + | embeddings_constraint: Constraint function applied to + | the `embeddings` matrix (see `keras.constraints`). + | mask_zero: Boolean, whether or not the input value 0 is a special + | "padding" value that should be masked out. + | This is useful when using recurrent layers which + | may take variable length input. If this is `True`, + | then all subsequent layers in the model need + | to support masking or an exception will be raised. + | If `mask_zero` is set to `True`, as a consequence, + | index 0 cannot be used in the vocabulary (`input_dim` should + | equal size of vocabulary + 1). + | weights: Optional floating-point matrix of size + | `(input_dim, output_dim)`. The initial embeddings values + | to use. + | lora_rank: Optional integer. If set, the layer's forward pass + | will implement LoRA (Low-Rank Adaptation) + | with the provided rank. LoRA sets the layer's embeddings + | matrix to non-trainable and replaces it with a delta over the + | original matrix, obtained via multiplying two lower-rank + | trainable matrices. This can be useful to reduce the + | computation cost of fine-tuning large embedding layers. + | You can also enable LoRA on an existing + | `Embedding` layer by calling `layer.enable_lora(rank)`. + | + | Input shape: + | 2D tensor with shape: `(batch_size, input_length)`. + | + | Output shape: + | 3D tensor with shape: `(batch_size, input_length, output_dim)`. + | + | Method resolution order: + | Embedding + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | input_dim, + | output_dim, + | embeddings_initializer='uniform', + | embeddings_regularizer=None, + | embeddings_constraint=None, + | mask_zero=False, + | weights=None, + | lora_rank=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape=None) + | + | call(self, inputs) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | enable_lora( + | self, + | rank, + | a_initializer='he_uniform', + | b_initializer='zeros' + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + | quantize(self, mode) + | + | quantized_build( + | self, + | input_shape, + | mode + | ) + | + | quantized_call(self, inputs) + | + | save_own_variables(self, store) + | Saves the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is saved upon calling `model.save()`. + | + | Args: + | store: Dict where the state of the model will be saved. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | embeddings + | + | ---------------------------------------------------------------------- + | Data and other attributes defined here: + | + | QUANTIZATION_MODE_ERROR_TEMPLATE = "Invalid quantization mode. Expecte... + | + diff --git a/.tether/man/layer_feature_space.txt b/.tether/man/layer_feature_space.txt new file mode 100644 index 0000000000..20893c37f7 --- /dev/null +++ b/.tether/man/layer_feature_space.txt @@ -0,0 +1,334 @@ +Help on class FeatureSpace in module keras.src.layers.preprocessing.feature_space: + +class FeatureSpace(keras.src.layers.layer.Layer) + | FeatureSpace(features, output_mode='concat', crosses=None, crossing_dim=32, hashing_dim=32, num_discretization_bins=32, name=None) + | + | One-stop utility for preprocessing and encoding structured data. + | + | Arguments: + | feature_names: Dict mapping the names of your features to their + | type specification, e.g. `{"my_feature": "integer_categorical"}` + | or `{"my_feature": FeatureSpace.integer_categorical()}`. + | For a complete list of all supported types, see + | "Available feature types" paragraph below. + | output_mode: One of `"concat"` or `"dict"`. In concat mode, all + | features get concatenated together into a single vector. + | In dict mode, the FeatureSpace returns a dict of individually + | encoded features (with the same keys as the input dict keys). + | crosses: List of features to be crossed together, e.g. + | `crosses=[("feature_1", "feature_2")]`. The features will be + | "crossed" by hashing their combined value into + | a fixed-length vector. + | crossing_dim: Default vector size for hashing crossed features. + | Defaults to `32`. + | hashing_dim: Default vector size for hashing features of type + | `"integer_hashed"` and `"string_hashed"`. Defaults to `32`. + | num_discretization_bins: Default number of bins to be used for + | discretizing features of type `"float_discretized"`. + | Defaults to `32`. + | + | **Available feature types:** + | + | Note that all features can be referred to by their string name, + | e.g. `"integer_categorical"`. When using the string name, the default + | argument values are used. + | + | ```python + | # Plain float values. + | FeatureSpace.float(name=None) + | + | # Float values to be preprocessed via featurewise standardization + | # (i.e. via a `keras.layers.Normalization` layer). + | FeatureSpace.float_normalized(name=None) + | + | # Float values to be preprocessed via linear rescaling + | # (i.e. via a `keras.layers.Rescaling` layer). + | FeatureSpace.float_rescaled(scale=1., offset=0., name=None) + | + | # Float values to be discretized. By default, the discrete + | # representation will then be one-hot encoded. + | FeatureSpace.float_discretized( + | num_bins, bin_boundaries=None, output_mode="one_hot", name=None) + | + | # Integer values to be indexed. By default, the discrete + | # representation will then be one-hot encoded. + | FeatureSpace.integer_categorical( + | max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None) + | + | # String values to be indexed. By default, the discrete + | # representation will then be one-hot encoded. + | FeatureSpace.string_categorical( + | max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None) + | + | # Integer values to be hashed into a fixed number of bins. + | # By default, the discrete representation will then be one-hot encoded. + | FeatureSpace.integer_hashed(num_bins, output_mode="one_hot", name=None) + | + | # String values to be hashed into a fixed number of bins. + | # By default, the discrete representation will then be one-hot encoded. + | FeatureSpace.string_hashed(num_bins, output_mode="one_hot", name=None) + | ``` + | + | Examples: + | + | **Basic usage with a dict of input data:** + | + | ```python + | raw_data = { + | "float_values": [0.0, 0.1, 0.2, 0.3], + | "string_values": ["zero", "one", "two", "three"], + | "int_values": [0, 1, 2, 3], + | } + | dataset = tf.data.Dataset.from_tensor_slices(raw_data) + | + | feature_space = FeatureSpace( + | features={ + | "float_values": "float_normalized", + | "string_values": "string_categorical", + | "int_values": "integer_categorical", + | }, + | crosses=[("string_values", "int_values")], + | output_mode="concat", + | ) + | # Before you start using the FeatureSpace, + | # you must `adapt()` it on some data. + | feature_space.adapt(dataset) + | + | # You can call the FeatureSpace on a dict of data (batched or unbatched). + | output_vector = feature_space(raw_data) + | ``` + | + | **Basic usage with `tf.data`:** + | + | ```python + | # Unlabeled data + | preprocessed_ds = unlabeled_dataset.map(feature_space) + | + | # Labeled data + | preprocessed_ds = labeled_dataset.map(lambda x, y: (feature_space(x), y)) + | ``` + | + | **Basic usage with the Keras Functional API:** + | + | ```python + | # Retrieve a dict Keras Input objects + | inputs = feature_space.get_inputs() + | # Retrieve the corresponding encoded Keras tensors + | encoded_features = feature_space.get_encoded_features() + | # Build a Functional model + | outputs = keras.layers.Dense(1, activation="sigmoid")(encoded_features) + | model = keras.Model(inputs, outputs) + | ``` + | + | **Customizing each feature or feature cross:** + | + | ```python + | feature_space = FeatureSpace( + | features={ + | "float_values": FeatureSpace.float_normalized(), + | "string_values": FeatureSpace.string_categorical(max_tokens=10), + | "int_values": FeatureSpace.integer_categorical(max_tokens=10), + | }, + | crosses=[ + | FeatureSpace.cross(("string_values", "int_values"), crossing_dim=32) + | ], + | output_mode="concat", + | ) + | ``` + | + | **Returning a dict of integer-encoded features:** + | + | ```python + | feature_space = FeatureSpace( + | features={ + | "string_values": FeatureSpace.string_categorical(output_mode="int"), + | "int_values": FeatureSpace.integer_categorical(output_mode="int"), + | }, + | crosses=[ + | FeatureSpace.cross( + | feature_names=("string_values", "int_values"), + | crossing_dim=32, + | output_mode="int", + | ) + | ], + | output_mode="dict", + | ) + | ``` + | + | **Specifying your own Keras preprocessing layer:** + | + | ```python + | # Let's say that one of the features is a short text paragraph that + | # we want to encode as a vector (one vector per paragraph) via TF-IDF. + | data = { + | "text": ["1st string", "2nd string", "3rd string"], + | } + | + | # There's a Keras layer for this: TextVectorization. + | custom_layer = layers.TextVectorization(output_mode="tf_idf") + | + | # We can use FeatureSpace.feature to create a custom feature + | # that will use our preprocessing layer. + | feature_space = FeatureSpace( + | features={ + | "text": FeatureSpace.feature( + | preprocessor=custom_layer, dtype="string", output_mode="float" + | ), + | }, + | output_mode="concat", + | ) + | feature_space.adapt(tf.data.Dataset.from_tensor_slices(data)) + | output_vector = feature_space(data) + | ``` + | + | **Retrieving the underlying Keras preprocessing layers:** + | + | ```python + | # The preprocessing layer of each feature is available in `.preprocessors`. + | preprocessing_layer = feature_space.preprocessors["feature1"] + | + | # The crossing layer of each feature cross is available in `.crossers`. + | # It's an instance of keras.layers.HashedCrossing. + | crossing_layer = feature_space.crossers["feature1_X_feature2"] + | ``` + | + | **Saving and reloading a FeatureSpace:** + | + | ```python + | feature_space.save("featurespace.keras") + | reloaded_feature_space = keras.models.load_model("featurespace.keras") + | ``` + | + | Method resolution order: + | FeatureSpace + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __call__(self, data) + | Call self as a function. + | + | __init__( + | self, + | features, + | output_mode='concat', + | crosses=None, + | crossing_dim=32, + | hashing_dim=32, + | num_discretization_bins=32, + | name=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | adapt(self, dataset) + | + | build_from_config(self, config) + | Builds the layer's states with the supplied config dict. + | + | By default, this method calls the `build(config["input_shape"])` method, + | which creates weights based on the layer's input shape in the supplied + | config. If your config contains other information needed to load the + | layer's state, you should override this method. + | + | Args: + | config: Dict containing the input shape associated with this layer. + | + | get_build_config(self) + | Returns a dictionary with the layer's input shape. + | + | This method returns a config dict that can be used by + | `build_from_config(config)` to create all states (e.g. Variables and + | Lookup tables) needed by the layer. + | + | By default, the config only contains the input shape that the layer + | was built with. If you're writing a custom layer that creates state in + | an unusual way, you should override this method to make sure this state + | is already created when Keras attempts to load its value upon model + | loading. + | + | Returns: + | A dict containing the input shape associated with the layer. + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_encoded_features(self) + | + | get_inputs(self) + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + | save(self, filepath) + | Save the `FeatureSpace` instance to a `.keras` file. + | + | You can reload it via `keras.models.load_model()`: + | + | ```python + | feature_space.save("featurespace.keras") + | reloaded_fs = keras.models.load_model("featurespace.keras") + | ``` + | + | save_own_variables(self, store) + | Saves the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is saved upon calling `model.save()`. + | + | Args: + | store: Dict where the state of the model will be saved. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | cross(feature_names, crossing_dim, output_mode='one_hot') from builtins.type + | + | feature(dtype, preprocessor, output_mode) from builtins.type + | + | float(name=None) from builtins.type + | + | float_discretized(num_bins, bin_boundaries=None, output_mode='one_hot', name=None) from builtins.type + | + | float_normalized(name=None) from builtins.type + | + | float_rescaled(scale=1.0, offset=0.0, name=None) from builtins.type + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | integer_categorical(max_tokens=None, num_oov_indices=1, output_mode='one_hot', name=None) from builtins.type + | + | integer_hashed(num_bins, output_mode='one_hot', name=None) from builtins.type + | + | string_categorical(max_tokens=None, num_oov_indices=1, output_mode='one_hot', name=None) from builtins.type + | + | string_hashed(num_bins, output_mode='one_hot', name=None) from builtins.type + | + diff --git a/.tether/man/layer_flatten.txt b/.tether/man/layer_flatten.txt new file mode 100644 index 0000000000..e83699b1e4 --- /dev/null +++ b/.tether/man/layer_flatten.txt @@ -0,0 +1,59 @@ +Help on class Flatten in module keras.src.layers.reshaping.flatten: + +class Flatten(keras.src.layers.layer.Layer) + | Flatten(data_format=None, **kwargs) + | + | Flattens the input. Does not affect the batch size. + | + | Note: If inputs are shaped `(batch,)` without a feature axis, then + | flattening adds an extra channel dimension and output shape is `(batch, 1)`. + | + | Args: + | data_format: A string, one of `"channels_last"` (default) or + | `"channels_first"`. The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch, ..., channels)` while `"channels_first"` corresponds to + | inputs with shape `(batch, channels, ...)`. + | When unspecified, uses `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json` (if exists). Defaults to + | `"channels_last"`. + | + | Example: + | + | >>> x = keras.Input(shape=(10, 64)) + | >>> y = keras.layers.Flatten()(x) + | >>> y.shape + | (None, 640) + | + | Method resolution order: + | Flatten + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_flax_module_wrapper.txt b/.tether/man/layer_flax_module_wrapper.txt new file mode 100644 index 0000000000..9139f201cd --- /dev/null +++ b/.tether/man/layer_flax_module_wrapper.txt @@ -0,0 +1,140 @@ +Help on class FlaxLayer in module keras.src.utils.jax_layer: + +class FlaxLayer(JaxLayer) + | FlaxLayer(module, method=None, variables=None, **kwargs) + | + | Keras Layer that wraps a [Flax](https://flax.readthedocs.io) module. + | + | This layer enables the use of Flax components in the form of + | [`flax.linen.Module`]( + | https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) + | instances within Keras when using JAX as the backend for Keras. + | + | The module method to use for the forward pass can be specified via the + | `method` argument and is `__call__` by default. This method must take the + | following arguments with these exact names: + | + | - `self` if the method is bound to the module, which is the case for the + | default of `__call__`, and `module` otherwise to pass the module. + | - `inputs`: the inputs to the model, a JAX array or a `PyTree` of arrays. + | - `training` *(optional)*: an argument specifying if we're in training mode + | or inference mode, `True` is passed in training mode. + | + | `FlaxLayer` handles the non-trainable state of your model and required RNGs + | automatically. Note that the `mutable` parameter of + | [`flax.linen.Module.apply()`]( + | https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html#flax.linen.apply) + | is set to `DenyList(["params"])`, therefore making the assumption that all + | the variables outside of the "params" collection are non-trainable weights. + | + | This example shows how to create a `FlaxLayer` from a Flax `Module` with + | the default `__call__` method and no training argument: + | + | ```python + | class MyFlaxModule(flax.linen.Module): + | @flax.linen.compact + | def __call__(self, inputs): + | x = inputs + | x = flax.linen.Conv(features=32, kernel_size=(3, 3))(x) + | x = flax.linen.relu(x) + | x = flax.linen.avg_pool(x, window_shape=(2, 2), strides=(2, 2)) + | x = x.reshape((x.shape[0], -1)) # flatten + | x = flax.linen.Dense(features=200)(x) + | x = flax.linen.relu(x) + | x = flax.linen.Dense(features=10)(x) + | x = flax.linen.softmax(x) + | return x + | + | flax_module = MyFlaxModule() + | keras_layer = FlaxLayer(flax_module) + | ``` + | + | This example shows how to wrap the module method to conform to the required + | signature. This allows having multiple input arguments and a training + | argument that has a different name and values. This additionally shows how + | to use a function that is not bound to the module. + | + | ```python + | class MyFlaxModule(flax.linen.Module): + | @flax.linen.compact + | def forward(self, input1, input2, deterministic): + | ... + | return outputs + | + | def my_flax_module_wrapper(module, inputs, training): + | input1, input2 = inputs + | return module.forward(input1, input2, not training) + | + | flax_module = MyFlaxModule() + | keras_layer = FlaxLayer( + | module=flax_module, + | method=my_flax_module_wrapper, + | ) + | ``` + | + | Args: + | module: An instance of `flax.linen.Module` or subclass. + | method: The method to call the model. This is generally a method in the + | `Module`. If not provided, the `__call__` method is used. `method` + | can also be a function not defined in the `Module`, in which case it + | must take the `Module` as the first argument. It is used for both + | `Module.init` and `Module.apply`. Details are documented in the + | `method` argument of [`flax.linen.Module.apply()`]( + | https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html#flax.linen.apply). + | variables: A `dict` containing all the variables of the module in the + | same format as what is returned by [`flax.linen.Module.init()`]( + | https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html#flax.linen.init). + | It should contain a "params" key and, if applicable, other keys for + | collections of variables for non-trainable state. This allows + | passing trained parameters and learned non-trainable state or + | controlling the initialization. If `None` is passed, the module's + | `init` function is called at build time to initialize the variables + | of the model. + | + | Method resolution order: + | FlaxLayer + | JaxLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | module, + | method=None, + | variables=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + diff --git a/.tether/man/layer_gaussian_dropout.txt b/.tether/man/layer_gaussian_dropout.txt new file mode 100644 index 0000000000..83da97b373 --- /dev/null +++ b/.tether/man/layer_gaussian_dropout.txt @@ -0,0 +1,55 @@ +Help on class GaussianDropout in module keras.src.layers.regularization.gaussian_dropout: + +class GaussianDropout(keras.src.layers.layer.Layer) + | GaussianDropout(rate, seed=None, **kwargs) + | + | Apply multiplicative 1-centered Gaussian noise. + | + | As it is a regularization layer, it is only active at training time. + | + | Args: + | rate: Float, drop probability (as with `Dropout`). + | The multiplicative noise will have + | standard deviation `sqrt(rate / (1 - rate))`. + | seed: Integer, optional random seed to enable deterministic behavior. + | + | Call arguments: + | inputs: Input tensor (of any rank). + | training: Python boolean indicating whether the layer should behave in + | training mode (adding dropout) or in inference mode (doing nothing). + | + | Method resolution order: + | GaussianDropout + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | rate, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=False + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_gaussian_noise.txt b/.tether/man/layer_gaussian_noise.txt new file mode 100644 index 0000000000..cc0446ac97 --- /dev/null +++ b/.tether/man/layer_gaussian_noise.txt @@ -0,0 +1,58 @@ +Help on class GaussianNoise in module keras.src.layers.regularization.gaussian_noise: + +class GaussianNoise(keras.src.layers.layer.Layer) + | GaussianNoise(stddev, seed=None, **kwargs) + | + | Apply additive zero-centered Gaussian noise. + | + | This is useful to mitigate overfitting + | (you could see it as a form of random data augmentation). + | Gaussian Noise (GS) is a natural choice as corruption process + | for real valued inputs. + | + | As it is a regularization layer, it is only active at training time. + | + | Args: + | stddev: Float, standard deviation of the noise distribution. + | seed: Integer, optional random seed to enable deterministic behavior. + | + | Call arguments: + | inputs: Input tensor (of any rank). + | training: Python boolean indicating whether the layer should behave in + | training mode (adding noise) or in inference mode (doing nothing). + | + | Method resolution order: + | GaussianNoise + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | stddev, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=False + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_global_average_pooling_1d.txt b/.tether/man/layer_global_average_pooling_1d.txt new file mode 100644 index 0000000000..a6f6746875 --- /dev/null +++ b/.tether/man/layer_global_average_pooling_1d.txt @@ -0,0 +1,86 @@ +Help on class GlobalAveragePooling1D in module keras.src.layers.pooling.global_average_pooling1d: + +class GlobalAveragePooling1D(keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling) + | GlobalAveragePooling1D(data_format=None, keepdims=False, **kwargs) + | + | Global average pooling operation for temporal data. + | + | Args: + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | keepdims: A boolean, whether to keep the temporal dimension or not. + | If `keepdims` is `False` (default), the rank of the tensor is + | reduced for spatial dimensions. If `keepdims` is `True`, the + | temporal dimension are retained with length 1. + | The behavior is the same as for `tf.reduce_mean` or `np.mean`. + | + | Call arguments: + | inputs: A 3D tensor. + | mask: Binary tensor of shape `(batch_size, steps)` indicating whether + | a given step should be masked (excluded from the average). + | + | Input shape: + | + | - If `data_format='channels_last'`: + | 3D tensor with shape: + | `(batch_size, steps, features)` + | - If `data_format='channels_first'`: + | 3D tensor with shape: + | `(batch_size, features, steps)` + | + | Output shape: + | + | - If `keepdims=False`: + | 2D tensor with shape `(batch_size, features)`. + | - If `keepdims=True`: + | - If `data_format="channels_last"`: + | 3D tensor with shape `(batch_size, 1, features)` + | - If `data_format="channels_first"`: + | 3D tensor with shape `(batch_size, features, 1)` + | + | Example: + | + | >>> x = np.random.rand(2, 3, 4) + | >>> y = keras.layers.GlobalAveragePooling1D()(x) + | >>> y.shape + | (2, 4) + | + | Method resolution order: + | GlobalAveragePooling1D + | keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | data_format=None, + | keepdims=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | mask=None + | ) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + diff --git a/.tether/man/layer_global_average_pooling_2d.txt b/.tether/man/layer_global_average_pooling_2d.txt new file mode 100644 index 0000000000..d842da5731 --- /dev/null +++ b/.tether/man/layer_global_average_pooling_2d.txt @@ -0,0 +1,72 @@ +Help on class GlobalAveragePooling2D in module keras.src.layers.pooling.global_average_pooling2d: + +class GlobalAveragePooling2D(keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling) + | GlobalAveragePooling2D(data_format=None, keepdims=False, **kwargs) + | + | Global average pooling operation for 2D data. + | + | Args: + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, height, weight)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | keepdims: A boolean, whether to keep the temporal dimension or not. + | If `keepdims` is `False` (default), the rank of the tensor is + | reduced for spatial dimensions. If `keepdims` is `True`, the + | spatial dimension are retained with length 1. + | The behavior is the same as for `tf.reduce_mean` or `np.mean`. + | + | Input shape: + | + | - If `data_format='channels_last'`: + | 4D tensor with shape: + | `(batch_size, height, width, channels)` + | - If `data_format='channels_first'`: + | 4D tensor with shape: + | `(batch_size, channels, height, width)` + | + | Output shape: + | + | - If `keepdims=False`: + | 2D tensor with shape `(batch_size, channels)`. + | - If `keepdims=True`: + | - If `data_format="channels_last"`: + | 4D tensor with shape `(batch_size, 1, 1, channels)` + | - If `data_format="channels_first"`: + | 4D tensor with shape `(batch_size, channels, 1, 1)` + | + | Example: + | + | >>> x = np.random.rand(2, 4, 5, 3) + | >>> y = keras.layers.GlobalAveragePooling2D()(x) + | >>> y.shape + | (2, 3) + | + | Method resolution order: + | GlobalAveragePooling2D + | keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | data_format=None, + | keepdims=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + diff --git a/.tether/man/layer_global_average_pooling_3d.txt b/.tether/man/layer_global_average_pooling_3d.txt new file mode 100644 index 0000000000..3d10576e2f --- /dev/null +++ b/.tether/man/layer_global_average_pooling_3d.txt @@ -0,0 +1,73 @@ +Help on class GlobalAveragePooling3D in module keras.src.layers.pooling.global_average_pooling3d: + +class GlobalAveragePooling3D(keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling) + | GlobalAveragePooling3D(data_format=None, keepdims=False, **kwargs) + | + | Global average pooling operation for 3D data. + | + | Args: + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | It defaults to the `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json`. If you never set it, then it + | will be `"channels_last"`. + | keepdims: A boolean, whether to keep the temporal dimension or not. + | If `keepdims` is `False` (default), the rank of the tensor is + | reduced for spatial dimensions. If `keepdims` is `True`, the + | spatial dimension are retained with length 1. + | The behavior is the same as for `tf.reduce_mean` or `np.mean`. + | + | Input shape: + | + | - If `data_format='channels_last'`: + | 5D tensor with shape: + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | - If `data_format='channels_first'`: + | 5D tensor with shape: + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + | + | Output shape: + | + | - If `keepdims=False`: + | 2D tensor with shape `(batch_size, channels)`. + | - If `keepdims=True`: + | - If `data_format="channels_last"`: + | 5D tensor with shape `(batch_size, 1, 1, 1, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape `(batch_size, channels, 1, 1, 1)` + | + | Example: + | + | >>> x = np.random.rand(2, 4, 5, 4, 3) + | >>> y = keras.layers.GlobalAveragePooling3D()(x) + | >>> y.shape + | (2, 3) + | + | Method resolution order: + | GlobalAveragePooling3D + | keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | data_format=None, + | keepdims=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + diff --git a/.tether/man/layer_global_max_pooling_1d.txt b/.tether/man/layer_global_max_pooling_1d.txt new file mode 100644 index 0000000000..6b594532e7 --- /dev/null +++ b/.tether/man/layer_global_max_pooling_1d.txt @@ -0,0 +1,71 @@ +Help on class GlobalMaxPooling1D in module keras.src.layers.pooling.global_max_pooling1d: + +class GlobalMaxPooling1D(keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling) + | GlobalMaxPooling1D(data_format=None, keepdims=False, **kwargs) + | + | Global max pooling operation for temporal data. + | + | Args: + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | keepdims: A boolean, whether to keep the temporal dimension or not. + | If `keepdims` is `False` (default), the rank of the tensor is + | reduced for spatial dimensions. If `keepdims` is `True`, the + | temporal dimension are retained with length 1. + | The behavior is the same as for `tf.reduce_mean` or `np.mean`. + | + | Input shape: + | + | - If `data_format='channels_last'`: + | 3D tensor with shape: + | `(batch_size, steps, features)` + | - If `data_format='channels_first'`: + | 3D tensor with shape: + | `(batch_size, features, steps)` + | + | Output shape: + | + | - If `keepdims=False`: + | 2D tensor with shape `(batch_size, features)`. + | - If `keepdims=True`: + | - If `data_format="channels_last"`: + | 3D tensor with shape `(batch_size, 1, features)` + | - If `data_format="channels_first"`: + | 3D tensor with shape `(batch_size, features, 1)` + | + | Example: + | + | >>> x = np.random.rand(2, 3, 4) + | >>> y = keras.layers.GlobalMaxPooling1D()(x) + | >>> y.shape + | (2, 4) + | + | Method resolution order: + | GlobalMaxPooling1D + | keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | data_format=None, + | keepdims=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + diff --git a/.tether/man/layer_global_max_pooling_2d.txt b/.tether/man/layer_global_max_pooling_2d.txt new file mode 100644 index 0000000000..f5d31f93aa --- /dev/null +++ b/.tether/man/layer_global_max_pooling_2d.txt @@ -0,0 +1,72 @@ +Help on class GlobalMaxPooling2D in module keras.src.layers.pooling.global_max_pooling2d: + +class GlobalMaxPooling2D(keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling) + | GlobalMaxPooling2D(data_format=None, keepdims=False, **kwargs) + | + | Global max pooling operation for 2D data. + | + | Args: + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, height, weight)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | keepdims: A boolean, whether to keep the temporal dimension or not. + | If `keepdims` is `False` (default), the rank of the tensor is + | reduced for spatial dimensions. If `keepdims` is `True`, the + | spatial dimension are retained with length 1. + | The behavior is the same as for `tf.reduce_mean` or `np.mean`. + | + | Input shape: + | + | - If `data_format='channels_last'`: + | 4D tensor with shape: + | `(batch_size, height, width, channels)` + | - If `data_format='channels_first'`: + | 4D tensor with shape: + | `(batch_size, channels, height, width)` + | + | Output shape: + | + | - If `keepdims=False`: + | 2D tensor with shape `(batch_size, channels)`. + | - If `keepdims=True`: + | - If `data_format="channels_last"`: + | 4D tensor with shape `(batch_size, 1, 1, channels)` + | - If `data_format="channels_first"`: + | 4D tensor with shape `(batch_size, channels, 1, 1)` + | + | Example: + | + | >>> x = np.random.rand(2, 4, 5, 3) + | >>> y = keras.layers.GlobalMaxPooling2D()(x) + | >>> y.shape + | (2, 3) + | + | Method resolution order: + | GlobalMaxPooling2D + | keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | data_format=None, + | keepdims=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + diff --git a/.tether/man/layer_global_max_pooling_3d.txt b/.tether/man/layer_global_max_pooling_3d.txt new file mode 100644 index 0000000000..e758422689 --- /dev/null +++ b/.tether/man/layer_global_max_pooling_3d.txt @@ -0,0 +1,73 @@ +Help on class GlobalMaxPooling3D in module keras.src.layers.pooling.global_max_pooling3d: + +class GlobalMaxPooling3D(keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling) + | GlobalMaxPooling3D(data_format=None, keepdims=False, **kwargs) + | + | Global max pooling operation for 3D data. + | + | Args: + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | It defaults to the `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json`. If you never set it, then it + | will be `"channels_last"`. + | keepdims: A boolean, whether to keep the temporal dimension or not. + | If `keepdims` is `False` (default), the rank of the tensor is + | reduced for spatial dimensions. If `keepdims` is `True`, the + | spatial dimension are retained with length 1. + | The behavior is the same as for `tf.reduce_mean` or `np.mean`. + | + | Input shape: + | + | - If `data_format='channels_last'`: + | 5D tensor with shape: + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | - If `data_format='channels_first'`: + | 5D tensor with shape: + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + | + | Output shape: + | + | - If `keepdims=False`: + | 2D tensor with shape `(batch_size, channels)`. + | - If `keepdims=True`: + | - If `data_format="channels_last"`: + | 5D tensor with shape `(batch_size, 1, 1, 1, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape `(batch_size, channels, 1, 1, 1)` + | + | Example: + | + | >>> x = np.random.rand(2, 4, 5, 4, 3) + | >>> y = keras.layers.GlobalMaxPooling3D()(x) + | >>> y.shape + | (2, 3) + | + | Method resolution order: + | GlobalMaxPooling3D + | keras.src.layers.pooling.base_global_pooling.BaseGlobalPooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | data_format=None, + | keepdims=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + diff --git a/.tether/man/layer_group_normalization.txt b/.tether/man/layer_group_normalization.txt new file mode 100644 index 0000000000..6c1a85fe98 --- /dev/null +++ b/.tether/man/layer_group_normalization.txt @@ -0,0 +1,100 @@ +Help on class GroupNormalization in module keras.src.layers.normalization.group_normalization: + +class GroupNormalization(keras.src.layers.layer.Layer) + | GroupNormalization(groups=32, axis=-1, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, **kwargs) + | + | Group normalization layer. + | + | Group Normalization divides the channels into groups and computes + | within each group the mean and variance for normalization. + | Empirically, its accuracy is more stable than batch norm in a wide + | range of small batch sizes, if learning rate is adjusted linearly + | with batch sizes. + | + | Relation to Layer Normalization: + | If the number of groups is set to 1, then this operation becomes nearly + | identical to Layer Normalization (see Layer Normalization docs for details). + | + | Relation to Instance Normalization: + | If the number of groups is set to the input dimension (number of groups is + | equal to number of channels), then this operation becomes identical to + | Instance Normalization. You can achieve this via `groups=-1`. + | + | Args: + | groups: Integer, the number of groups for Group Normalization. Can be in + | the range `[1, N]` where N is the input dimension. The input + | dimension must be divisible by the number of groups. + | Defaults to 32. + | axis: Integer or List/Tuple. The axis or axes to normalize across. + | Typically, this is the features axis/axes. The left-out axes are + | typically the batch axis/axes. -1 is the last dimension in the + | input. Defaults to `-1`. + | epsilon: Small float added to variance to avoid dividing by zero. + | Defaults to 1e-3. + | center: If `True`, add offset of `beta` to normalized tensor. + | If `False`, `beta` is ignored. Defaults to `True`. + | scale: If `True`, multiply by `gamma`. If `False`, `gamma` is not used. + | When the next layer is linear (also e.g. `relu`), this can be + | disabled since the scaling will be done by the next layer. + | Defaults to `True`. + | beta_initializer: Initializer for the beta weight. Defaults to zeros. + | gamma_initializer: Initializer for the gamma weight. Defaults to ones. + | beta_regularizer: Optional regularizer for the beta weight. None by + | default. + | gamma_regularizer: Optional regularizer for the gamma weight. None by + | default. + | beta_constraint: Optional constraint for the beta weight. + | None by default. + | gamma_constraint: Optional constraint for the gamma weight. None by + | default. Input shape: Arbitrary. Use the keyword argument + | `input_shape` (tuple of integers, does not include the samples + | axis) when using this layer as the first layer in a model. + | Output shape: Same shape as input. + | **kwargs: Base layer keyword arguments (e.g. `name` and `dtype`). + | + | Reference: + | + | - [Yuxin Wu & Kaiming He, 2018](https://arxiv.org/abs/1803.08494) + | + | Method resolution order: + | GroupNormalization + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | groups=32, + | axis=-1, + | epsilon=0.001, + | center=True, + | scale=True, + | beta_initializer='zeros', + | gamma_initializer='ones', + | beta_regularizer=None, + | gamma_regularizer=None, + | beta_constraint=None, + | gamma_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_group_query_attention.txt b/.tether/man/layer_group_query_attention.txt new file mode 100644 index 0000000000..e2ef758ce4 --- /dev/null +++ b/.tether/man/layer_group_query_attention.txt @@ -0,0 +1,136 @@ +Help on class GroupedQueryAttention in module keras.src.layers.attention.grouped_query_attention: + +class GroupedQueryAttention(keras.src.layers.layer.Layer) + | GroupedQueryAttention(head_dim, num_query_heads, num_key_value_heads, dropout=0.0, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | Grouped Query Attention layer. + | + | This is an implementation of grouped-query attention introduced by + | [Ainslie et al., 2023](https://arxiv.org/abs/2305.13245). Here + | `num_key_value_heads` denotes number of groups, setting + | `num_key_value_heads` to 1 is equivalent to multi-query attention, and + | when `num_key_value_heads` is equal to `num_query_heads` it is equivalent + | to multi-head attention. + | + | This layer first projects `query`, `key`, and `value` tensors. Then, `key` + | and `value` are repeated to match the number of heads of `query`. + | + | Then, the `query` is scaled and dot-producted with `key` tensors. These are + | softmaxed to obtain attention probabilities. The value tensors are then + | interpolated by these probabilities and concatenated back to a single + | tensor. + | + | Args: + | head_dim: Size of each attention head. + | num_query_heads: Number of query attention heads. + | num_key_value_heads: Number of key and value attention heads. + | dropout: Dropout probability. + | use_bias: Boolean, whether the dense layers use bias vectors/matrices. + | kernel_initializer: Initializer for dense layer kernels. + | bias_initializer: Initializer for dense layer biases. + | kernel_regularizer: Regularizer for dense layer kernels. + | bias_regularizer: Regularizer for dense layer biases. + | activity_regularizer: Regularizer for dense layer activity. + | kernel_constraint: Constraint for dense layer kernels. + | bias_constraint: Constraint for dense layer kernels. + | + | Call arguments: + | query: Query tensor of shape `(batch_dim, target_seq_len, feature_dim)`, + | where `batch_dim` is batch size, `target_seq_len` is the length of + | target sequence, and `feature_dim` is dimension of feature. + | value: Value tensor of shape `(batch_dim, source_seq_len, feature_dim)`, + | where `batch_dim` is batch size, `source_seq_len` is the length of + | source sequence, and `feature_dim` is dimension of feature. + | key: Optional key tensor of shape + | `(batch_dim, source_seq_len, feature_dim)`. If not given, will use + | `value` for both `key` and `value`, which is most common case. + | attention_mask: A boolean mask of shape + | `(batch_dim, target_seq_len, source_seq_len)`, that prevents + | attention to certain positions. The boolean mask specifies which + | query elements can attend to which key elements, where 1 indicates + | attention and 0 indicates no attention. Broadcasting can happen for + | the missing batch dimensions and the head dimension. + | return_attention_scores: A boolean to indicate whether the output + | should be `(attention_output, attention_scores)` if `True`, or + | `attention_output` if `False`. Defaults to `False`. + | training: Python boolean indicating whether the layer should behave in + | training mode (adding dropout) or in inference mode (no dropout). + | Will go with either using the training mode of the parent + | layer/model or `False` (inference) if there is no parent layer. + | use_causal_mask: A boolean to indicate whether to apply a causal mask to + | prevent tokens from attending to future tokens (e.g., used in a + | decoder Transformer). + | + | Returns: + | attention_output: Result of the computation, of shape + | `(batch_dim, target_seq_len, feature_dim)`, where `target_seq_len` + | is for target sequence length and `feature_dim` is the query input + | last dim. + | attention_scores: (Optional) attention coefficients of shape + | `(batch_dim, num_query_heads, target_seq_len, source_seq_len)`. + | + | Method resolution order: + | GroupedQueryAttention + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | head_dim, + | num_query_heads, + | num_key_value_heads, + | dropout=0.0, + | use_bias=True, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build( + | self, + | query_shape, + | value_shape, + | key_shape=None + | ) + | + | call( + | self, + | query, + | value, + | key=None, + | query_mask=None, + | value_mask=None, + | key_mask=None, + | attention_mask=None, + | return_attention_scores=False, + | training=None, + | use_causal_mask=False + | ) + | + | compute_output_shape( + | self, + | query_shape, + | value_shape, + | key_shape=None + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_gru.txt b/.tether/man/layer_gru.txt new file mode 100644 index 0000000000..69b97cf4b6 --- /dev/null +++ b/.tether/man/layer_gru.txt @@ -0,0 +1,245 @@ +Help on class GRU in module keras.src.layers.rnn.gru: + +class GRU(keras.src.layers.rnn.rnn.RNN) + | GRU(units, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=True, use_cudnn='auto', **kwargs) + | + | Gated Recurrent Unit - Cho et al. 2014. + | + | Based on available runtime hardware and constraints, this layer + | will choose different implementations (cuDNN-based or backend-native) + | to maximize the performance. If a GPU is available and all + | the arguments to the layer meet the requirement of the cuDNN kernel + | (see below for details), the layer will use a fast cuDNN implementation + | when using the TensorFlow backend. + | + | The requirements to use the cuDNN implementation are: + | + | 1. `activation` == `tanh` + | 2. `recurrent_activation` == `sigmoid` + | 3. `dropout` == 0 and `recurrent_dropout` == 0 + | 4. `unroll` is `False` + | 5. `use_bias` is `True` + | 6. `reset_after` is `True` + | 7. Inputs, if use masking, are strictly right-padded. + | 8. Eager execution is enabled in the outermost context. + | + | There are two variants of the GRU implementation. The default one is based + | on [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to + | hidden state before matrix multiplication. The other one is based on + | [original](https://arxiv.org/abs/1406.1078v1) and has the order reversed. + | + | The second variant is compatible with CuDNNGRU (GPU-only) and allows + | inference on CPU. Thus it has separate biases for `kernel` and + | `recurrent_kernel`. To use this variant, set `reset_after=True` and + | `recurrent_activation='sigmoid'`. + | + | For example: + | + | >>> inputs = np.random.random((32, 10, 8)) + | >>> gru = keras.layers.GRU(4) + | >>> output = gru(inputs) + | >>> output.shape + | (32, 4) + | >>> gru = keras.layers.GRU(4, return_sequences=True, return_state=True) + | >>> whole_sequence_output, final_state = gru(inputs) + | >>> whole_sequence_output.shape + | (32, 10, 4) + | >>> final_state.shape + | (32, 4) + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. + | Default: hyperbolic tangent (`tanh`). + | If you pass `None`, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | recurrent_activation: Activation function to use + | for the recurrent step. + | Default: sigmoid (`sigmoid`). + | If you pass `None`, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation of the recurrent + | state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | activity_regularizer: Regularizer function applied to the output of the + | layer (its "activation"). Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | seed: Random seed for dropout. + | return_sequences: Boolean. Whether to return the last output + | in the output sequence, or the full sequence. Default: `False`. + | return_state: Boolean. Whether to return the last state in addition + | to the output. Default: `False`. + | go_backwards: Boolean (default `False`). + | If `True`, process the input sequence backwards and return the + | reversed sequence. + | stateful: Boolean (default: `False`). If `True`, the last state + | for each sample at index i in a batch will be used as initial + | state for the sample of index i in the following batch. + | unroll: Boolean (default: `False`). + | If `True`, the network will be unrolled, + | else a symbolic loop will be used. + | Unrolling can speed-up a RNN, + | although it tends to be more memory-intensive. + | Unrolling is only suitable for short sequences. + | reset_after: GRU convention (whether to apply reset gate after or + | before matrix multiplication). `False` is `"before"`, + | `True` is `"after"` (default and cuDNN compatible). + | use_cudnn: Whether to use a cuDNN-backed implementation. `"auto"` will + | attempt to use cuDNN when feasible, and will fallback to the + | default implementation if not. + | + | Call arguments: + | inputs: A 3D tensor, with shape `(batch, timesteps, feature)`. + | mask: Binary tensor of shape `(samples, timesteps)` indicating whether + | a given timestep should be masked (optional). + | An individual `True` entry indicates that the corresponding timestep + | should be utilized, while a `False` entry indicates that the + | corresponding timestep should be ignored. Defaults to `None`. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. This argument is passed to the + | cell when calling it. This is only relevant if `dropout` or + | `recurrent_dropout` is used (optional). Defaults to `None`. + | initial_state: List of initial state tensors to be passed to the first + | call of the cell (optional, `None` causes creation + | of zero-filled initial state tensors). Defaults to `None`. + | + | Method resolution order: + | GRU + | keras.src.layers.rnn.rnn.RNN + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | return_sequences=False, + | return_state=False, + | go_backwards=False, + | stateful=False, + | unroll=False, + | reset_after=True, + | use_cudnn='auto', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | sequences, + | initial_state=None, + | mask=None, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | inner_loop( + | self, + | sequences, + | initial_state, + | mask, + | training=False + | ) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | activation + | + | bias_constraint + | + | bias_initializer + | + | bias_regularizer + | + | dropout + | + | kernel_constraint + | + | kernel_initializer + | + | kernel_regularizer + | + | recurrent_activation + | + | recurrent_constraint + | + | recurrent_dropout + | + | recurrent_initializer + | + | recurrent_regularizer + | + | reset_after + | + | units + | + | use_bias + | + diff --git a/.tether/man/layer_gru_cell.txt b/.tether/man/layer_gru_cell.txt new file mode 100644 index 0000000000..6e79a049a0 --- /dev/null +++ b/.tether/man/layer_gru_cell.txt @@ -0,0 +1,126 @@ +Help on class GRUCell in module keras.src.layers.rnn.gru: + +class GRUCell(keras.src.layers.layer.Layer, keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell) + | GRUCell(units, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, reset_after=True, seed=None, **kwargs) + | + | Cell class for the GRU layer. + | + | This class processes one step within the whole time sequence input, whereas + | `keras.layer.GRU` processes the whole sequence. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. Default: hyperbolic tangent + | (`tanh`). If you pass None, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | recurrent_activation: Activation function to use for the recurrent step. + | Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + | applied (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation + | of the recurrent state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | reset_after: GRU convention (whether to apply reset gate after or + | before matrix multiplication). False = "before", + | True = "after" (default and cuDNN compatible). + | seed: Random seed for dropout. + | + | Call arguments: + | inputs: A 2D tensor, with shape `(batch, features)`. + | states: A 2D tensor with shape `(batch, units)`, which is the state + | from the previous time step. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. Only relevant when `dropout` or + | `recurrent_dropout` is used. + | + | Example: + | + | >>> inputs = np.random.random((32, 10, 8)) + | >>> rnn = keras.layers.RNN(keras.layers.GRUCell(4)) + | >>> output = rnn(inputs) + | >>> output.shape + | (32, 4) + | >>> rnn = keras.layers.RNN( + | ... keras.layers.GRUCell(4), + | ... return_sequences=True, + | ... return_state=True) + | >>> whole_sequence_output, final_state = rnn(inputs) + | >>> whole_sequence_output.shape + | (32, 10, 4) + | >>> final_state.shape + | (32, 4) + | + | Method resolution order: + | GRUCell + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | reset_after=True, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | states, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | diff --git a/.tether/man/layer_hashed_crossing.txt b/.tether/man/layer_hashed_crossing.txt new file mode 100644 index 0000000000..7a09d03516 --- /dev/null +++ b/.tether/man/layer_hashed_crossing.txt @@ -0,0 +1,97 @@ +Help on class HashedCrossing in module keras.src.layers.preprocessing.hashed_crossing: + +class HashedCrossing(keras.src.layers.layer.Layer) + | HashedCrossing(num_bins, output_mode='int', sparse=False, name=None, dtype=None, **kwargs) + | + | A preprocessing layer which crosses features using the "hashing trick". + | + | This layer performs crosses of categorical features using the "hashing + | trick". Conceptually, the transformation can be thought of as: + | `hash(concatenate(features)) % num_bins. + | + | This layer currently only performs crosses of scalar inputs and batches of + | scalar inputs. Valid input shapes are `(batch_size, 1)`, `(batch_size,)` and + | `()`. + | + | **Note:** This layer wraps `tf.keras.layers.HashedCrossing`. It cannot + | be used as part of the compiled computation graph of a model with + | any backend other than TensorFlow. + | It can however be used with any backend when running eagerly. + | It can also always be used as part of an input preprocessing pipeline + | with any backend (outside the model itself), which is how we recommend + | to use this layer. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | num_bins: Number of hash bins. + | output_mode: Specification for the output of the layer. Values can be + | `"int"`, or `"one_hot"` configuring the layer as follows: + | - `"int"`: Return the integer bin indices directly. + | - `"one_hot"`: Encodes each individual element in the input into an + | array the same size as `num_bins`, containing a 1 at the input's + | bin index. Defaults to `"int"`. + | sparse: Boolean. Only applicable to `"one_hot"` mode and only valid + | when using the TensorFlow backend. If `True`, returns + | a `SparseTensor` instead of a dense `Tensor`. Defaults to `False`. + | **kwargs: Keyword arguments to construct a layer. + | + | Examples: + | + | **Crossing two scalar features.** + | + | >>> layer = keras.layers.HashedCrossing( + | ... num_bins=5) + | >>> feat1 = np.array(['A', 'B', 'A', 'B', 'A']) + | >>> feat2 = np.array([101, 101, 101, 102, 102]) + | >>> layer((feat1, feat2)) + | array([1, 4, 1, 1, 3]) + | + | **Crossing and one-hotting two scalar features.** + | + | >>> layer = keras.layers.HashedCrossing( + | ... num_bins=5, output_mode='one_hot') + | >>> feat1 = np.array(['A', 'B', 'A', 'B', 'A']) + | >>> feat2 = np.array([101, 101, 101, 102, 102]) + | >>> layer((feat1, feat2)) + | array([[0., 1., 0., 0., 0.], + | [0., 0., 0., 0., 1.], + | [0., 1., 0., 0., 0.], + | [0., 1., 0., 0., 0.], + | [0., 0., 0., 1., 0.]], dtype=float32) + | + | Method resolution order: + | HashedCrossing + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_bins, + | output_mode='int', + | sparse=False, + | name=None, + | dtype=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_hashing.txt b/.tether/man/layer_hashing.txt new file mode 100644 index 0000000000..88a220ee10 --- /dev/null +++ b/.tether/man/layer_hashing.txt @@ -0,0 +1,163 @@ +Help on class Hashing in module keras.src.layers.preprocessing.hashing: + +class Hashing(keras.src.layers.layer.Layer) + | Hashing(num_bins, mask_value=None, salt=None, output_mode='int', sparse=False, **kwargs) + | + | A preprocessing layer which hashes and bins categorical features. + | + | This layer transforms categorical inputs to hashed output. It element-wise + | converts a ints or strings to ints in a fixed range. The stable hash + | function uses `tensorflow::ops::Fingerprint` to produce the same output + | consistently across all platforms. + | + | This layer uses [FarmHash64](https://github.com/google/farmhash) by default, + | which provides a consistent hashed output across different platforms and is + | stable across invocations, regardless of device and context, by mixing the + | input bits thoroughly. + | + | If you want to obfuscate the hashed output, you can also pass a random + | `salt` argument in the constructor. In that case, the layer will use the + | [SipHash64](https://github.com/google/highwayhash) hash function, with + | the `salt` value serving as additional input to the hash function. + | + | **Note:** This layer internally uses TensorFlow. It cannot + | be used as part of the compiled computation graph of a model with + | any backend other than TensorFlow. + | It can however be used with any backend when running eagerly. + | It can also always be used as part of an input preprocessing pipeline + | with any backend (outside the model itself), which is how we recommend + | to use this layer. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | **Example (FarmHash64)** + | + | >>> layer = keras.layers.Hashing(num_bins=3) + | >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] + | >>> layer(inp) + | array([[1], + | [0], + | [1], + | [1], + | [2]])> + | + | **Example (FarmHash64) with a mask value** + | + | >>> layer = keras.layers.Hashing(num_bins=3, mask_value='') + | >>> inp = [['A'], ['B'], [''], ['C'], ['D']] + | >>> layer(inp) + | array([[1], + | [1], + | [0], + | [2], + | [2]]) + | + | **Example (SipHash64)** + | + | >>> layer = keras.layers.Hashing(num_bins=3, salt=[133, 137]) + | >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] + | >>> layer(inp) + | array([[1], + | [2], + | [1], + | [0], + | [2]]) + | + | **Example (Siphash64 with a single integer, same as `salt=[133, 133]`)** + | + | >>> layer = keras.layers.Hashing(num_bins=3, salt=133) + | >>> inp = [['A'], ['B'], ['C'], ['D'], ['E']] + | >>> layer(inp) + | array([[0], + | [0], + | [2], + | [1], + | [0]]) + | + | Args: + | num_bins: Number of hash bins. Note that this includes the `mask_value` + | bin, so the effective number of bins is `(num_bins - 1)` + | if `mask_value` is set. + | mask_value: A value that represents masked inputs, which are mapped to + | index 0. `None` means no mask term will be added and the + | hashing will start at index 0. Defaults to `None`. + | salt: A single unsigned integer or None. + | If passed, the hash function used will be SipHash64, + | with these values used as an additional input + | (known as a "salt" in cryptography). + | These should be non-zero. If `None`, uses the FarmHash64 hash + | function. It also supports tuple/list of 2 unsigned + | integer numbers, see reference paper for details. + | Defaults to `None`. + | output_mode: Specification for the output of the layer. Values can be + | `"int"`, `"one_hot"`, `"multi_hot"`, or + | `"count"` configuring the layer as follows: + | - `"int"`: Return the integer bin indices directly. + | - `"one_hot"`: Encodes each individual element in the input into an + | array the same size as `num_bins`, containing a 1 + | at the input's bin index. If the last dimension is size 1, + | will encode on that dimension. + | If the last dimension is not size 1, will append a new + | dimension for the encoded output. + | - `"multi_hot"`: Encodes each sample in the input into a + | single array the same size as `num_bins`, + | containing a 1 for each bin index + | index present in the sample. Treats the last dimension + | as the sample dimension, if input shape is + | `(..., sample_length)`, output shape will be + | `(..., num_tokens)`. + | - `"count"`: As `"multi_hot"`, but the int array contains a count of + | the number of times the bin index appeared in the sample. + | Defaults to `"int"`. + | sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, + | and `"count"` output modes. Only supported with TensorFlow + | backend. If `True`, returns a `SparseTensor` instead of + | a dense `Tensor`. Defaults to `False`. + | **kwargs: Keyword arguments to construct a layer. + | + | Input shape: + | A single string, a list of strings, or an `int32` or `int64` tensor + | of shape `(batch_size, ...,)`. + | + | Output shape: + | An `int32` tensor of shape `(batch_size, ...)`. + | + | Reference: + | + | - [SipHash with salt](https://www.131002.net/siphash/siphash.pdf) + | + | Method resolution order: + | Hashing + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_bins, + | mask_value=None, + | salt=None, + | output_mode='int', + | sparse=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_identity.txt b/.tether/man/layer_identity.txt new file mode 100644 index 0000000000..754fedd0a9 --- /dev/null +++ b/.tether/man/layer_identity.txt @@ -0,0 +1,32 @@ +Help on class Identity in module keras.src.layers.core.identity: + +class Identity(keras.src.layers.layer.Layer) + | Identity(**kwargs) + | + | Identity layer. + | + | This layer should be used as a placeholder when no operation is to be + | performed. The layer just returns its `inputs` argument as output. + | + | Method resolution order: + | Identity + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__(self, **kwargs) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + diff --git a/.tether/man/layer_input.txt b/.tether/man/layer_input.txt new file mode 100644 index 0000000000..3348a3bcbc --- /dev/null +++ b/.tether/man/layer_input.txt @@ -0,0 +1,54 @@ +__signature__ +keras.layers.Input( + shape=None, + batch_size=None, + dtype=None, + sparse=None, + batch_shape=None, + name=None, + tensor=None +) +__doc__ +Used to instantiate a Keras tensor. + +A Keras tensor is a symbolic tensor-like object, which we augment with +certain attributes that allow us to build a Keras model just by knowing the +inputs and outputs of the model. + +For instance, if `a`, `b` and `c` are Keras tensors, +it becomes possible to do: +`model = Model(input=[a, b], output=c)` + +Args: + shape: A shape tuple (tuple of integers or `None` objects), + not including the batch size. + For instance, `shape=(32,)` indicates that the expected input + will be batches of 32-dimensional vectors. Elements of this tuple + can be `None`; `None` elements represent dimensions where the shape + is not known and may vary (e.g. sequence length). + batch_size: Optional static batch size (integer). + dtype: The data type expected by the input, as a string + (e.g. `"float32"`, `"int32"`...) + sparse: A boolean specifying whether the expected input will be sparse + tensors. Note that, if `sparse` is `False`, sparse tensors can still + be passed into the input - they will be densified with a default + value of 0. This feature is only supported with the TensorFlow + backend. Defaults to `False`. + name: Optional name string for the layer. + Should be unique in a model (do not reuse the same name twice). + It will be autogenerated if it isn't provided. + tensor: Optional existing tensor to wrap into the `Input` layer. + If set, the layer will use this tensor rather + than creating a new placeholder tensor. + +Returns: + A Keras tensor. + +Example: + +```python +# This is a logistic regression in Keras +x = Input(shape=(32,)) +y = Dense(16, activation='softmax')(x) +model = Model(x, y) +``` diff --git a/.tether/man/layer_integer_lookup.txt b/.tether/man/layer_integer_lookup.txt new file mode 100644 index 0000000000..bb623bf54d --- /dev/null +++ b/.tether/man/layer_integer_lookup.txt @@ -0,0 +1,362 @@ +Help on class IntegerLookup in module keras.src.layers.preprocessing.integer_lookup: + +class IntegerLookup(keras.src.layers.preprocessing.index_lookup.IndexLookup) + | IntegerLookup(max_tokens=None, num_oov_indices=1, mask_token=None, oov_token=-1, vocabulary=None, vocabulary_dtype='int64', idf_weights=None, invert=False, output_mode='int', sparse=False, pad_to_max_tokens=False, name=None, **kwargs) + | + | A preprocessing layer that maps integers to (possibly encoded) indices. + | + | This layer maps a set of arbitrary integer input tokens into indexed integer + | output via a table-based vocabulary lookup. The layer's output indices will + | be contiguously arranged up to the maximum vocab size, even if the input + | tokens are non-continguous or unbounded. The layer supports multiple options + | for encoding the output via `output_mode`, and has optional support for + | out-of-vocabulary (OOV) tokens and masking. + | + | The vocabulary for the layer must be either supplied on construction or + | learned via `adapt()`. During `adapt()`, the layer will analyze a data set, + | determine the frequency of individual integer tokens, and create a + | vocabulary from them. If the vocabulary is capped in size, the most frequent + | tokens will be used to create the vocabulary and all others will be treated + | as OOV. + | + | There are two possible output modes for the layer. When `output_mode` is + | `"int"`, input integers are converted to their index in the vocabulary (an + | integer). When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, + | input integers are encoded into an array where each dimension corresponds to + | an element in the vocabulary. + | + | The vocabulary can optionally contain a mask token as well as an OOV token + | (which can optionally occupy multiple indices in the vocabulary, as set + | by `num_oov_indices`). + | The position of these tokens in the vocabulary is fixed. When `output_mode` + | is `"int"`, the vocabulary will begin with the mask token at index 0, + | followed by OOV indices, followed by the rest of the vocabulary. When + | `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will + | begin with OOV indices and instances of the mask token will be dropped. + | + | **Note:** This layer uses TensorFlow internally. It cannot + | be used as part of the compiled computation graph of a model with + | any backend other than TensorFlow. + | It can however be used with any backend when running eagerly. + | It can also always be used as part of an input preprocessing pipeline + | with any backend (outside the model itself), which is how we recommend + | to use this layer. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | max_tokens: Maximum size of the vocabulary for this layer. This should + | only be specified when adapting the vocabulary or when setting + | `pad_to_max_tokens=True`. If None, there is no cap on the size of + | the vocabulary. Note that this size includes the OOV + | and mask tokens. Defaults to `None`. + | num_oov_indices: The number of out-of-vocabulary tokens to use. + | If this value is more than 1, OOV inputs are modulated to + | determine their OOV value. + | If this value is 0, OOV inputs will cause an error when calling + | the layer. Defaults to `1`. + | mask_token: An integer token that represents masked inputs. When + | `output_mode` is `"int"`, the token is included in vocabulary + | and mapped to index 0. In other output modes, + | the token will not appear in the vocabulary and instances + | of the mask token in the input will be dropped. + | If set to None, no mask term will be added. Defaults to `None`. + | oov_token: Only used when `invert` is `True`. The token to return + | for OOV indices. Defaults to `-1`. + | vocabulary: Optional. Either an array of integers or a string path to a + | text file. If passing an array, can pass a tuple, list, + | 1D NumPy array, or 1D tensor containing the integer vocbulary terms. + | If passing a file path, the file should contain one line per term + | in the vocabulary. If this argument is set, + | there is no need to `adapt()` the layer. + | vocabulary_dtype: The dtype of the vocabulary terms, for example + | `"int64"` or `"int32"`. Defaults to `"int64"`. + | idf_weights: Only valid when `output_mode` is `"tf_idf"`. + | A tuple, list, 1D NumPy array, or 1D tensor or the same length + | as the vocabulary, containing the floating point inverse document + | frequency weights, which will be multiplied by per sample term + | counts for the final TF-IDF weight. + | If the `vocabulary` argument is set, and `output_mode` is + | `"tf_idf"`, this argument must be supplied. + | invert: Only valid when `output_mode` is `"int"`. + | If `True`, this layer will map indices to vocabulary items + | instead of mapping vocabulary items to indices. + | Defaults to `False`. + | output_mode: Specification for the output of the layer. Values can be + | `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + | configuring the layer as follows: + | - `"int"`: Return the vocabulary indices of the input tokens. + | - `"one_hot"`: Encodes each individual element in the input into an + | array the same size as the vocabulary, + | containing a 1 at the element index. If the last dimension + | is size 1, will encode on that dimension. + | If the last dimension is not size 1, will append a new + | dimension for the encoded output. + | - `"multi_hot"`: Encodes each sample in the input into a single + | array the same size as the vocabulary, + | containing a 1 for each vocabulary term present in the sample. + | Treats the last dimension as the sample dimension, + | if input shape is `(..., sample_length)`, + | output shape will be `(..., num_tokens)`. + | - `"count"`: As `"multi_hot"`, but the int array contains + | a count of the number of times the token at that index + | appeared in the sample. + | - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is + | applied to find the value in each token slot. + | For `"int"` output, any shape of input and output is supported. + | For all other output modes, currently only output up to rank 2 + | is supported. Defaults to `"int"`. + | pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, + | `"count"`, or `"tf_idf"`. If `True`, the output will have + | its feature axis padded to `max_tokens` even if the number + | of unique tokens in the vocabulary is less than `max_tokens`, + | resulting in a tensor of shape `(batch_size, max_tokens)` + | regardless of vocabulary size. Defaults to `False`. + | sparse: Boolean. Only applicable to `"multi_hot"`, `"count"`, and + | `"tf_idf"` output modes. Only supported with TensorFlow + | backend. If `True`, returns a `SparseTensor` + | instead of a dense `Tensor`. Defaults to `False`. + | + | Examples: + | + | **Creating a lookup layer with a known vocabulary** + | + | This example creates a lookup layer with a pre-existing vocabulary. + | + | >>> vocab = [12, 36, 1138, 42] + | >>> data = np.array([[12, 1138, 42], [42, 1000, 36]]) # Note OOV tokens + | >>> layer = IntegerLookup(vocabulary=vocab) + | >>> layer(data) + | array([[1, 3, 4], + | [4, 0, 2]]) + | + | **Creating a lookup layer with an adapted vocabulary** + | + | This example creates a lookup layer and generates the vocabulary by + | analyzing the dataset. + | + | >>> data = np.array([[12, 1138, 42], [42, 1000, 36]]) + | >>> layer = IntegerLookup() + | >>> layer.adapt(data) + | >>> layer.get_vocabulary() + | [-1, 42, 1138, 1000, 36, 12] + | + | Note that the OOV token -1 have been added to the vocabulary. The remaining + | tokens are sorted by frequency (42, which has 2 occurrences, is first) then + | by inverse sort order. + | + | >>> data = np.array([[12, 1138, 42], [42, 1000, 36]]) + | >>> layer = IntegerLookup() + | >>> layer.adapt(data) + | >>> layer(data) + | array([[5, 2, 1], + | [1, 3, 4]]) + | + | **Lookups with multiple OOV indices** + | + | This example demonstrates how to use a lookup layer with multiple OOV + | indices. When a layer is created with more than one OOV index, any OOV + | tokens are hashed into the number of OOV buckets, distributing OOV tokens in + | a deterministic fashion across the set. + | + | >>> vocab = [12, 36, 1138, 42] + | >>> data = np.array([[12, 1138, 42], [37, 1000, 36]]) + | >>> layer = IntegerLookup(vocabulary=vocab, num_oov_indices=2) + | >>> layer(data) + | array([[2, 4, 5], + | [1, 0, 3]]) + | + | Note that the output for OOV token 37 is 1, while the output for OOV token + | 1000 is 0. The in-vocab terms have their output index increased by 1 from + | earlier examples (12 maps to 2, etc) in order to make space for the extra + | OOV token. + | + | **One-hot output** + | + | Configure the layer with `output_mode='one_hot'`. Note that the first + | `num_oov_indices` dimensions in the ont_hot encoding represent OOV values. + | + | >>> vocab = [12, 36, 1138, 42] + | >>> data = np.array([12, 36, 1138, 42, 7]) # Note OOV tokens + | >>> layer = IntegerLookup(vocabulary=vocab, output_mode='one_hot') + | >>> layer(data) + | array([[0., 1., 0., 0., 0.], + | [0., 0., 1., 0., 0.], + | [0., 0., 0., 1., 0.], + | [0., 0., 0., 0., 1.], + | [1., 0., 0., 0., 0.]], dtype=float32) + | + | **Multi-hot output** + | + | Configure the layer with `output_mode='multi_hot'`. Note that the first + | `num_oov_indices` dimensions in the multi_hot encoding represent OOV tokens + | + | >>> vocab = [12, 36, 1138, 42] + | >>> data = np.array([[12, 1138, 42, 42], + | ... [42, 7, 36, 7]]) # Note OOV tokens + | >>> layer = IntegerLookup(vocabulary=vocab, output_mode='multi_hot') + | >>> layer(data) + | array([[0., 1., 0., 1., 1.], + | [1., 0., 1., 0., 1.]], dtype=float32) + | + | **Token count output** + | + | Configure the layer with `output_mode='count'`. As with multi_hot output, + | the first `num_oov_indices` dimensions in the output represent OOV tokens. + | + | >>> vocab = [12, 36, 1138, 42] + | >>> data = np.array([[12, 1138, 42, 42], + | ... [42, 7, 36, 7]]) # Note OOV tokens + | >>> layer = IntegerLookup(vocabulary=vocab, output_mode='count') + | >>> layer(data) + | array([[0., 1., 0., 1., 2.], + | [2., 0., 1., 0., 1.]], dtype=float32) + | + | **TF-IDF output** + | + | Configure the layer with `output_mode='tf_idf'`. As with multi_hot output, + | the first `num_oov_indices` dimensions in the output represent OOV tokens. + | + | Each token bin will output `token_count * idf_weight`, where the idf weights + | are the inverse document frequency weights per token. These should be + | provided along with the vocabulary. Note that the `idf_weight` for OOV + | tokens will default to the average of all idf weights passed in. + | + | >>> vocab = [12, 36, 1138, 42] + | >>> idf_weights = [0.25, 0.75, 0.6, 0.4] + | >>> data = np.array([[12, 1138, 42, 42], + | ... [42, 7, 36, 7]]) # Note OOV tokens + | >>> layer = IntegerLookup( + | ... output_mode='tf_idf', vocabulary=vocab, idf_weights=idf_weights) + | >>> layer(data) + | array([[0. , 0.25, 0. , 0.6 , 0.8 ], + | [1.0 , 0. , 0.75, 0. , 0.4 ]], dtype=float32) + | + | To specify the idf weights for oov tokens, you will need to pass the entire + | vocabulary including the leading oov token. + | + | >>> vocab = [-1, 12, 36, 1138, 42] + | >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4] + | >>> data = np.array([[12, 1138, 42, 42], + | ... [42, 7, 36, 7]]) # Note OOV tokens + | >>> layer = IntegerLookup( + | ... output_mode='tf_idf', vocabulary=vocab, idf_weights=idf_weights) + | >>> layer(data) + | array([[0. , 0.25, 0. , 0.6 , 0.8 ], + | [1.8 , 0. , 0.75, 0. , 0.4 ]], dtype=float32) + | + | When adapting the layer in `"tf_idf"` mode, each input sample will + | be considered a document, and IDF weight per token will be + | calculated as: + | `log(1 + num_documents / (1 + token_document_count))`. + | + | **Inverse lookup** + | + | This example demonstrates how to map indices to tokens using this layer. + | (You can also use `adapt()` with `inverse=True`, but for simplicity we'll + | pass the vocab in this example.) + | + | >>> vocab = [12, 36, 1138, 42] + | >>> data = np.array([[1, 3, 4], [4, 0, 2]]) + | >>> layer = IntegerLookup(vocabulary=vocab, invert=True) + | >>> layer(data) + | array([[ 12, 1138, 42], + | [ 42, -1, 36]]) + | + | Note that the first index correspond to the oov token by default. + | + | **Forward and inverse lookup pairs** + | + | This example demonstrates how to use the vocabulary of a standard lookup + | layer to create an inverse lookup layer. + | + | >>> vocab = [12, 36, 1138, 42] + | >>> data = np.array([[12, 1138, 42], [42, 1000, 36]]) + | >>> layer = IntegerLookup(vocabulary=vocab) + | >>> i_layer = IntegerLookup( + | ... vocabulary=layer.get_vocabulary(), invert=True) + | >>> int_data = layer(data) + | >>> i_layer(int_data) + | array([[ 12, 1138, 42], + | [ 42, -1, 36]]) + | + | In this example, the input token 1000 resulted in an output of -1, since + | 1000 was not in the vocabulary - it got represented as an OOV, and all OOV + | tokens are returned as -1 in the inverse layer. Also, note that for the + | inverse to work, you must have already set the forward layer vocabulary + | either directly or via `adapt()` before calling `get_vocabulary()`. + | + | Method resolution order: + | IntegerLookup + | keras.src.layers.preprocessing.index_lookup.IndexLookup + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | max_tokens=None, + | num_oov_indices=1, + | mask_token=None, + | oov_token=-1, + | vocabulary=None, + | vocabulary_dtype='int64', + | idf_weights=None, + | invert=False, + | output_mode='int', + | sparse=False, + | pad_to_max_tokens=False, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | adapt( + | self, + | data, + | steps=None + | ) + | Computes a vocabulary of integer terms from tokens in a dataset. + | + | Calling `adapt()` on an `IntegerLookup` layer is an alternative to + | passing in a precomputed vocabulary on construction via the + | `vocabulary` argument. An `IntegerLookup` layer should always be either + | adapted over a dataset or supplied with a vocabulary. + | + | During `adapt()`, the layer will build a vocabulary of all integer + | tokens seen in the dataset, sorted by occurrence count, with ties broken + | by sort order of the tokens (high to low). At the end of `adapt()`, if + | `max_tokens` is set, the vocabulary will be truncated to `max_tokens` + | size. For example, adapting a layer with `max_tokens=1000` will compute + | the 1000 most frequent tokens occurring in the input dataset. If + | `output_mode='tf-idf'`, `adapt()` will also learn the document + | frequencies of each token in the input dataset. + | + | Arguments: + | data: The data to train on. It can be passed either as a + | batched `tf.data.Dataset`, as a list of integers, + | or as a NumPy array. + | steps: Integer or `None`. + | Total number of steps (batches of samples) to process. + | If `data` is a `tf.data.Dataset`, and `steps` is `None`, + | `adapt()` will run until the input dataset is exhausted. + | When passing an infinitely + | repeating dataset, you must specify the `steps` argument. This + | argument is not supported with array inputs or list inputs. + | + | call(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_jax_model_wrapper.txt b/.tether/man/layer_jax_model_wrapper.txt new file mode 100644 index 0000000000..f4e341b380 --- /dev/null +++ b/.tether/man/layer_jax_model_wrapper.txt @@ -0,0 +1,252 @@ +Help on class JaxLayer in module keras.src.utils.jax_layer: + +class JaxLayer(keras.src.layers.layer.Layer) + | JaxLayer(call_fn, init_fn=None, params=None, state=None, seed=None, **kwargs) + | + | Keras Layer that wraps a JAX model. + | + | This layer enables the use of JAX components within Keras when using JAX as + | the backend for Keras. + | + | ## Model function + | + | This layer accepts JAX models in the form of a function, `call_fn`, which + | must take the following arguments with these exact names: + | + | - `params`: trainable parameters of the model. + | - `state` (*optional*): non-trainable state of the model. Can be omitted if + | the model has no non-trainable state. + | - `rng` (*optional*): a `jax.random.PRNGKey` instance. Can be omitted if the + | model does not need RNGs, neither during training nor during inference. + | - `inputs`: inputs to the model, a JAX array or a `PyTree` of arrays. + | - `training` (*optional*): an argument specifying if we're in training mode + | or inference mode, `True` is passed in training mode. Can be omitted if + | the model behaves the same in training mode and inference mode. + | + | The `inputs` argument is mandatory. Inputs to the model must be provided via + | a single argument. If the JAX model takes multiple inputs as separate + | arguments, they must be combined into a single structure, for instance in a + | `tuple` or a `dict`. + | + | ## Model weights initialization + | + | The initialization of the `params` and `state` of the model can be handled + | by this layer, in which case the `init_fn` argument must be provided. This + | allows the model to be initialized dynamically with the right shape. + | Alternatively, and if the shape is known, the `params` argument and + | optionally the `state` argument can be used to create an already initialized + | model. + | + | The `init_fn` function, if provided, must take the following arguments with + | these exact names: + | + | - `rng`: a `jax.random.PRNGKey` instance. + | - `inputs`: a JAX array or a `PyTree` of arrays with placeholder values to + | provide the shape of the inputs. + | - `training` (*optional*): an argument specifying if we're in training mode + | or inference mode. `True` is always passed to `init_fn`. Can be omitted + | regardless of whether `call_fn` has a `training` argument. + | + | ## Models with non-trainable state + | + | For JAX models that have non-trainable state: + | + | - `call_fn` must have a `state` argument + | - `call_fn` must return a `tuple` containing the outputs of the model and + | the new non-trainable state of the model + | - `init_fn` must return a `tuple` containing the initial trainable params of + | the model and the initial non-trainable state of the model. + | + | This code shows a possible combination of `call_fn` and `init_fn` signatures + | for a model with non-trainable state. In this example, the model has a + | `training` argument and an `rng` argument in `call_fn`. + | + | ```python + | def stateful_call(params, state, rng, inputs, training): + | outputs = ... + | new_state = ... + | return outputs, new_state + | + | def stateful_init(rng, inputs): + | initial_params = ... + | initial_state = ... + | return initial_params, initial_state + | ``` + | + | ## Models without non-trainable state + | + | For JAX models with no non-trainable state: + | + | - `call_fn` must not have a `state` argument + | - `call_fn` must return only the outputs of the model + | - `init_fn` must return only the initial trainable params of the model. + | + | This code shows a possible combination of `call_fn` and `init_fn` signatures + | for a model without non-trainable state. In this example, the model does not + | have a `training` argument and does not have an `rng` argument in `call_fn`. + | + | ```python + | def stateless_call(params, inputs): + | outputs = ... + | return outputs + | + | def stateless_init(rng, inputs): + | initial_params = ... + | return initial_params + | ``` + | + | ## Conforming to the required signature + | + | If a model has a different signature than the one required by `JaxLayer`, + | one can easily write a wrapper method to adapt the arguments. This example + | shows a model that has multiple inputs as separate arguments, expects + | multiple RNGs in a `dict`, and has a `deterministic` argument with the + | opposite meaning of `training`. To conform, the inputs are combined in a + | single structure using a `tuple`, the RNG is split and used the populate the + | expected `dict`, and the Boolean flag is negated: + | + | ```python + | def my_model_fn(params, rngs, input1, input2, deterministic): + | ... + | if not deterministic: + | dropout_rng = rngs["dropout"] + | keep = jax.random.bernoulli(dropout_rng, dropout_rate, x.shape) + | x = jax.numpy.where(keep, x / dropout_rate, 0) + | ... + | ... + | return outputs + | + | def my_model_wrapper_fn(params, rng, inputs, training): + | input1, input2 = inputs + | rng1, rng2 = jax.random.split(rng) + | rngs = {"dropout": rng1, "preprocessing": rng2} + | deterministic = not training + | return my_model_fn(params, rngs, input1, input2, deterministic) + | + | keras_layer = JaxLayer(my_model_wrapper_fn, params=initial_params) + | ``` + | + | ## Usage with Haiku modules + | + | `JaxLayer` enables the use of [Haiku](https://dm-haiku.readthedocs.io) + | components in the form of + | [`haiku.Module`](https://dm-haiku.readthedocs.io/en/latest/api.html#module). + | This is achieved by transforming the module per the Haiku pattern and then + | passing `module.apply` in the `call_fn` parameter and `module.init` in the + | `init_fn` parameter if needed. + | + | If the model has non-trainable state, it should be transformed with + | [`haiku.transform_with_state`]( + | https://dm-haiku.readthedocs.io/en/latest/api.html#haiku.transform_with_state). + | If the model has no non-trainable state, it should be transformed with + | [`haiku.transform`]( + | https://dm-haiku.readthedocs.io/en/latest/api.html#haiku.transform). + | Additionally, and optionally, if the module does not use RNGs in "apply", it + | can be transformed with + | [`haiku.without_apply_rng`]( + | https://dm-haiku.readthedocs.io/en/latest/api.html#without-apply-rng). + | + | The following example shows how to create a `JaxLayer` from a Haiku module + | that uses random number generators via `hk.next_rng_key()` and takes a + | training positional argument: + | + | ```python + | class MyHaikuModule(hk.Module): + | def __call__(self, x, training): + | x = hk.Conv2D(32, (3, 3))(x) + | x = jax.nn.relu(x) + | x = hk.AvgPool((1, 2, 2, 1), (1, 2, 2, 1), "VALID")(x) + | x = hk.Flatten()(x) + | x = hk.Linear(200)(x) + | if training: + | x = hk.dropout(rng=hk.next_rng_key(), rate=0.3, x=x) + | x = jax.nn.relu(x) + | x = hk.Linear(10)(x) + | x = jax.nn.softmax(x) + | return x + | + | def my_haiku_module_fn(inputs, training): + | module = MyHaikuModule() + | return module(inputs, training) + | + | transformed_module = hk.transform(my_haiku_module_fn) + | + | keras_layer = JaxLayer( + | call_fn=transformed_module.apply, + | init_fn=transformed_module.init, + | ) + | ``` + | + | Args: + | call_fn: The function to call the model. See description above for the + | list of arguments it takes and the outputs it returns. + | init_fn: the function to call to initialize the model. See description + | above for the list of arguments it takes and the ouputs it returns. + | If `None`, then `params` and/or `state` must be provided. + | params: A `PyTree` containing all the model trainable parameters. This + | allows passing trained parameters or controlling the initialization. + | If both `params` and `state` are `None`, `init_fn` is called at + | build time to initialize the trainable parameters of the model. + | state: A `PyTree` containing all the model non-trainable state. This + | allows passing learned state or controlling the initialization. If + | both `params` and `state` are `None`, and `call_fn` takes a `state` + | argument, then `init_fn` is called at build time to initialize the + | non-trainable state of the model. + | seed: Seed for random number generator. Optional. + | + | Method resolution order: + | JaxLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | call_fn, + | init_fn=None, + | params=None, + | state=None, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + diff --git a/.tether/man/layer_lambda.txt b/.tether/man/layer_lambda.txt new file mode 100644 index 0000000000..7c063f0e8c --- /dev/null +++ b/.tether/man/layer_lambda.txt @@ -0,0 +1,114 @@ +Help on class Lambda in module keras.src.layers.core.lambda_layer: + +class Lambda(keras.src.layers.layer.Layer) + | Lambda(function, output_shape=None, mask=None, arguments=None, **kwargs) + | + | Wraps arbitrary expressions as a `Layer` object. + | + | The `Lambda` layer exists so that arbitrary expressions can be used + | as a `Layer` when constructing Sequential + | and Functional API models. `Lambda` layers are best suited for simple + | operations or quick experimentation. For more advanced use cases, + | prefer writing new subclasses of `Layer`. + | + | WARNING: `Lambda` layers have (de)serialization limitations! + | + | The main reason to subclass `Layer` instead of using a + | `Lambda` layer is saving and inspecting a model. `Lambda` layers + | are saved by serializing the Python bytecode, which is fundamentally + | non-portable and potentially unsafe. + | They should only be loaded in the same environment where + | they were saved. Subclassed layers can be saved in a more portable way + | by overriding their `get_config()` method. Models that rely on + | subclassed Layers are also often easier to visualize and reason about. + | + | Example: + | + | ```python + | # add a x -> x^2 layer + | model.add(Lambda(lambda x: x ** 2)) + | ``` + | + | Args: + | function: The function to be evaluated. Takes input tensor as first + | argument. + | output_shape: Expected output shape from function. This argument + | can usually be inferred if not explicitly provided. + | Can be a tuple or function. If a tuple, it only specifies + | the first dimension onward; sample dimension is assumed + | either the same as the input: + | `output_shape = (input_shape[0], ) + output_shape` or, + | the input is `None` and the sample dimension is also `None`: + | `output_shape = (None, ) + output_shape`. + | If a function, it specifies the + | entire shape as a function of the input shape: + | `output_shape = f(input_shape)`. + | mask: Either None (indicating no masking) or a callable with the same + | signature as the `compute_mask` layer method, or a tensor + | that will be returned as output mask regardless + | of what the input is. + | arguments: Optional dictionary of keyword arguments to be passed to the + | function. + | + | Method resolution order: + | Lambda + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | function, + | output_shape=None, + | mask=None, + | arguments=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | mask=None, + | training=None + | ) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None, safe_mode=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + diff --git a/.tether/man/layer_layer_normalization.txt b/.tether/man/layer_layer_normalization.txt new file mode 100644 index 0000000000..c950045a1b --- /dev/null +++ b/.tether/man/layer_layer_normalization.txt @@ -0,0 +1,142 @@ +Help on class LayerNormalization in module keras.src.layers.normalization.layer_normalization: + +class LayerNormalization(keras.src.layers.layer.Layer) + | LayerNormalization(axis=-1, epsilon=0.001, center=True, scale=True, rms_scaling=False, beta_initializer='zeros', gamma_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None, **kwargs) + | + | Layer normalization layer (Ba et al., 2016). + | + | Normalize the activations of the previous layer for each given example in a + | batch independently, rather than across a batch like Batch Normalization. + | i.e. applies a transformation that maintains the mean activation within each + | example close to 0 and the activation standard deviation close to 1. + | + | If `scale` or `center` are enabled, the layer will scale the normalized + | outputs by broadcasting them with a trainable variable `gamma`, and center + | the outputs by broadcasting with a trainable variable `beta`. `gamma` will + | default to a ones tensor and `beta` will default to a zeros tensor, so that + | centering and scaling are no-ops before training has begun. + | + | So, with scaling and centering enabled the normalization equations + | are as follows: + | + | Let the intermediate activations for a mini-batch to be the `inputs`. + | + | For each sample `x_i` in `inputs` with `k` features, we compute the mean and + | variance of the sample: + | + | ```python + | mean_i = sum(x_i[j] for j in range(k)) / k + | var_i = sum((x_i[j] - mean_i) ** 2 for j in range(k)) / k + | ``` + | + | and then compute a normalized `x_i_normalized`, including a small factor + | `epsilon` for numerical stability. + | + | ```python + | x_i_normalized = (x_i - mean_i) / sqrt(var_i + epsilon) + | ``` + | + | And finally `x_i_normalized ` is linearly transformed by `gamma` and `beta`, + | which are learned parameters: + | + | ```python + | output_i = x_i_normalized * gamma + beta + | ``` + | + | `gamma` and `beta` will span the axes of `inputs` specified in `axis`, and + | this part of the inputs' shape must be fully defined. + | + | For example: + | + | >>> layer = keras.layers.LayerNormalization(axis=[1, 2, 3]) + | >>> layer.build([5, 20, 30, 40]) + | >>> print(layer.beta.shape) + | (20, 30, 40) + | >>> print(layer.gamma.shape) + | (20, 30, 40) + | + | Note that other implementations of layer normalization may choose to define + | `gamma` and `beta` over a separate set of axes from the axes being + | normalized across. For example, Group Normalization + | ([Wu et al. 2018](https://arxiv.org/abs/1803.08494)) with group size of 1 + | corresponds to a Layer Normalization that normalizes across height, width, + | and channel and has `gamma` and `beta` span only the channel dimension. + | So, this Layer Normalization implementation will not match a Group + | Normalization layer with group size set to 1. + | + | Args: + | axis: Integer or List/Tuple. The axis or axes to normalize across. + | Typically, this is the features axis/axes. The left-out axes are + | typically the batch axis/axes. `-1` is the last dimension in the + | input. Defaults to `-1`. + | epsilon: Small float added to variance to avoid dividing by zero. + | Defaults to 1e-3. + | center: If True, add offset of `beta` to normalized tensor. If False, + | `beta` is ignored. Defaults to `True`. + | scale: If True, multiply by `gamma`. If False, `gamma` is not used. + | When the next layer is linear (also e.g. `nn.relu`), this can be + | disabled since the scaling will be done by the next layer. + | Defaults to `True`. + | rms_scaling: If True, `center` and `scale` are ignored, and the + | inputs are scaled by `gamma` and the inverse square root + | of the square of all inputs. This is an approximate and faster + | approach that avoids ever computing the mean of the input. + | beta_initializer: Initializer for the beta weight. Defaults to zeros. + | gamma_initializer: Initializer for the gamma weight. Defaults to ones. + | beta_regularizer: Optional regularizer for the beta weight. + | None by default. + | gamma_regularizer: Optional regularizer for the gamma weight. + | None by default. + | beta_constraint: Optional constraint for the beta weight. + | None by default. + | gamma_constraint: Optional constraint for the gamma weight. + | None by default. + | **kwargs: Base layer keyword arguments (e.g. `name` and `dtype`). + | + | + | Reference: + | + | - [Lei Ba et al., 2016](https://arxiv.org/abs/1607.06450). + | + | Method resolution order: + | LayerNormalization + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axis=-1, + | epsilon=0.001, + | center=True, + | scale=True, + | rms_scaling=False, + | beta_initializer='zeros', + | gamma_initializer='ones', + | beta_regularizer=None, + | gamma_regularizer=None, + | beta_constraint=None, + | gamma_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_lstm.txt b/.tether/man/layer_lstm.txt new file mode 100644 index 0000000000..4e3237bcc2 --- /dev/null +++ b/.tether/man/layer_lstm.txt @@ -0,0 +1,238 @@ +Help on class LSTM in module keras.src.layers.rnn.lstm: + +class LSTM(keras.src.layers.rnn.rnn.RNN) + | LSTM(units, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, use_cudnn='auto', **kwargs) + | + | Long Short-Term Memory layer - Hochreiter 1997. + | + | Based on available runtime hardware and constraints, this layer + | will choose different implementations (cuDNN-based or backend-native) + | to maximize the performance. If a GPU is available and all + | the arguments to the layer meet the requirement of the cuDNN kernel + | (see below for details), the layer will use a fast cuDNN implementation + | when using the TensorFlow backend. + | The requirements to use the cuDNN implementation are: + | + | 1. `activation` == `tanh` + | 2. `recurrent_activation` == `sigmoid` + | 3. `dropout` == 0 and `recurrent_dropout` == 0 + | 4. `unroll` is `False` + | 5. `use_bias` is `True` + | 6. Inputs, if use masking, are strictly right-padded. + | 7. Eager execution is enabled in the outermost context. + | + | For example: + | + | >>> inputs = np.random.random((32, 10, 8)) + | >>> lstm = keras.layers.LSTM(4) + | >>> output = lstm(inputs) + | >>> output.shape + | (32, 4) + | >>> lstm = keras.layers.LSTM( + | ... 4, return_sequences=True, return_state=True) + | >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs) + | >>> whole_seq_output.shape + | (32, 10, 4) + | >>> final_memory_state.shape + | (32, 4) + | >>> final_carry_state.shape + | (32, 4) + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. + | Default: hyperbolic tangent (`tanh`). + | If you pass `None`, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | recurrent_activation: Activation function to use + | for the recurrent step. + | Default: sigmoid (`sigmoid`). + | If you pass `None`, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation of the recurrent + | state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | unit_forget_bias: Boolean (default `True`). If `True`, + | add 1 to the bias of the forget gate at initialization. + | Setting it to `True` will also force `bias_initializer="zeros"`. + | This is recommended in [Jozefowicz et al.]( + | https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf) + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | activity_regularizer: Regularizer function applied to the output of the + | layer (its "activation"). Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | seed: Random seed for dropout. + | return_sequences: Boolean. Whether to return the last output + | in the output sequence, or the full sequence. Default: `False`. + | return_state: Boolean. Whether to return the last state in addition + | to the output. Default: `False`. + | go_backwards: Boolean (default: `False`). + | If `True`, process the input sequence backwards and return the + | reversed sequence. + | stateful: Boolean (default: `False`). If `True`, the last state + | for each sample at index i in a batch will be used as initial + | state for the sample of index i in the following batch. + | unroll: Boolean (default False). + | If `True`, the network will be unrolled, + | else a symbolic loop will be used. + | Unrolling can speed-up a RNN, + | although it tends to be more memory-intensive. + | Unrolling is only suitable for short sequences. + | use_cudnn: Whether to use a cuDNN-backed implementation. `"auto"` will + | attempt to use cuDNN when feasible, and will fallback to the + | default implementation if not. + | + | Call arguments: + | inputs: A 3D tensor, with shape `(batch, timesteps, feature)`. + | mask: Binary tensor of shape `(samples, timesteps)` indicating whether + | a given timestep should be masked (optional). + | An individual `True` entry indicates that the corresponding timestep + | should be utilized, while a `False` entry indicates that the + | corresponding timestep should be ignored. Defaults to `None`. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. This argument is passed to the + | cell when calling it. This is only relevant if `dropout` or + | `recurrent_dropout` is used (optional). Defaults to `None`. + | initial_state: List of initial state tensors to be passed to the first + | call of the cell (optional, `None` causes creation + | of zero-filled initial state tensors). Defaults to `None`. + | + | Method resolution order: + | LSTM + | keras.src.layers.rnn.rnn.RNN + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | unit_forget_bias=True, + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | return_sequences=False, + | return_state=False, + | go_backwards=False, + | stateful=False, + | unroll=False, + | use_cudnn='auto', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | sequences, + | initial_state=None, + | mask=None, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | inner_loop( + | self, + | sequences, + | initial_state, + | mask, + | training=False + | ) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | activation + | + | bias_constraint + | + | bias_initializer + | + | bias_regularizer + | + | dropout + | + | kernel_constraint + | + | kernel_initializer + | + | kernel_regularizer + | + | recurrent_activation + | + | recurrent_constraint + | + | recurrent_dropout + | + | recurrent_initializer + | + | recurrent_regularizer + | + | unit_forget_bias + | + | units + | + | use_bias + | + diff --git a/.tether/man/layer_lstm_cell.txt b/.tether/man/layer_lstm_cell.txt new file mode 100644 index 0000000000..0fcd08f24e --- /dev/null +++ b/.tether/man/layer_lstm_cell.txt @@ -0,0 +1,128 @@ +Help on class LSTMCell in module keras.src.layers.rnn.lstm: + +class LSTMCell(keras.src.layers.layer.Layer, keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell) + | LSTMCell(units, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, **kwargs) + | + | Cell class for the LSTM layer. + | + | This class processes one step within the whole time sequence input, whereas + | `keras.layer.LSTM` processes the whole sequence. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. Default: hyperbolic tangent + | (`tanh`). If you pass None, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | recurrent_activation: Activation function to use for the recurrent step. + | Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + | applied (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation + | of the recurrent state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | unit_forget_bias: Boolean (default `True`). If `True`, + | add 1 to the bias of the forget gate at initialization. + | Setting it to `True` will also force `bias_initializer="zeros"`. + | This is recommended in [Jozefowicz et al.]( + | https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf) + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | seed: Random seed for dropout. + | + | Call arguments: + | inputs: A 2D tensor, with shape `(batch, features)`. + | states: A 2D tensor with shape `(batch, units)`, which is the state + | from the previous time step. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. Only relevant when `dropout` or + | `recurrent_dropout` is used. + | + | Example: + | + | >>> inputs = np.random.random((32, 10, 8)) + | >>> rnn = keras.layers.RNN(keras.layers.LSTMCell(4)) + | >>> output = rnn(inputs) + | >>> output.shape + | (32, 4) + | >>> rnn = keras.layers.RNN( + | ... keras.layers.LSTMCell(4), + | ... return_sequences=True, + | ... return_state=True) + | >>> whole_sequence_output, final_state = rnn(inputs) + | >>> whole_sequence_output.shape + | (32, 10, 4) + | >>> final_state.shape + | (32, 4) + | + | Method resolution order: + | LSTMCell + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | unit_forget_bias=True, + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | states, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | diff --git a/.tether/man/layer_masking.txt b/.tether/man/layer_masking.txt new file mode 100644 index 0000000000..90b8f97766 --- /dev/null +++ b/.tether/man/layer_masking.txt @@ -0,0 +1,77 @@ +Help on class Masking in module keras.src.layers.core.masking: + +class Masking(keras.src.layers.layer.Layer) + | Masking(mask_value=0.0, **kwargs) + | + | Masks a sequence by using a mask value to skip timesteps. + | + | For each timestep in the input tensor (dimension #1 in the tensor), + | if all values in the input tensor at that timestep + | are equal to `mask_value`, then the timestep will be masked (skipped) + | in all downstream layers (as long as they support masking). + | + | If any downstream layer does not support masking yet receives such + | an input mask, an exception will be raised. + | + | Example: + | + | Consider a NumPy data array `x` of shape `(samples, timesteps, features)`, + | to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you + | lack data for these timesteps. You can: + | + | - Set `x[:, 3, :] = 0.` and `x[:, 5, :] = 0.` + | - Insert a `Masking` layer with `mask_value=0.` before the LSTM layer: + | + | ```python + | samples, timesteps, features = 32, 10, 8 + | inputs = np.random.random([samples, timesteps, features]).astype(np.float32) + | inputs[:, 3, :] = 0. + | inputs[:, 5, :] = 0. + | + | model = keras.models.Sequential() + | model.add(keras.layers.Masking(mask_value=0.) + | model.add(keras.layers.LSTM(32)) + | output = model(inputs) + | # The time step 3 and 5 will be skipped from LSTM calculation. + | ``` + | + | Note: in the Keras masking convention, a masked timestep is denoted by + | a mask value of `False`, while a non-masked (i.e. usable) timestep + | is denoted by a mask value of `True`. + | + | Method resolution order: + | Masking + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | mask_value=0.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_max_pooling_1d.txt b/.tether/man/layer_max_pooling_1d.txt new file mode 100644 index 0000000000..4b9edacca2 --- /dev/null +++ b/.tether/man/layer_max_pooling_1d.txt @@ -0,0 +1,95 @@ +Help on class MaxPooling1D in module keras.src.layers.pooling.max_pooling1d: + +class MaxPooling1D(keras.src.layers.pooling.base_pooling.BasePooling) + | MaxPooling1D(pool_size=2, strides=None, padding='valid', data_format=None, name=None, **kwargs) + | + | Max pooling operation for 1D temporal data. + | + | Downsamples the input representation by taking the maximum value over a + | spatial window of size `pool_size`. The window is shifted by `strides`. + | + | The resulting output when using the `"valid"` padding option has a shape of: + | `output_shape = (input_shape - pool_size + 1) / strides)`. + | + | The resulting output shape when using the `"same"` padding option is: + | `output_shape = input_shape / strides` + | + | Args: + | pool_size: int, size of the max pooling window. + | strides: int or None. Specifies how much the pooling window moves + | for each pooling step. If None, it will default to `pool_size`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 3D tensor with shape `(batch_size, steps, features)`. + | - If `data_format="channels_first"`: + | 3D tensor with shape `(batch_size, features, steps)`. + | + | Output shape: + | - If `data_format="channels_last"`: + | 3D tensor with shape `(batch_size, downsampled_steps, features)`. + | - If `data_format="channels_first"`: + | 3D tensor with shape `(batch_size, features, downsampled_steps)`. + | + | Examples: + | + | `strides=1` and `padding="valid"`: + | + | >>> x = np.array([1., 2., 3., 4., 5.]) + | >>> x = np.reshape(x, [1, 5, 1]) + | >>> max_pool_1d = keras.layers.MaxPooling1D(pool_size=2, + | ... strides=1, padding="valid") + | >>> max_pool_1d(x) + | + | `strides=2` and `padding="valid"`: + | + | >>> x = np.array([1., 2., 3., 4., 5.]) + | >>> x = np.reshape(x, [1, 5, 1]) + | >>> max_pool_1d = keras.layers.MaxPooling1D(pool_size=2, + | ... strides=2, padding="valid") + | >>> max_pool_1d(x) + | + | `strides=1` and `padding="same"`: + | + | >>> x = np.array([1., 2., 3., 4., 5.]) + | >>> x = np.reshape(x, [1, 5, 1]) + | >>> max_pool_1d = keras.layers.MaxPooling1D(pool_size=2, + | ... strides=1, padding="same") + | >>> max_pool_1d(x) + | + | Method resolution order: + | MaxPooling1D + | keras.src.layers.pooling.base_pooling.BasePooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | pool_size=2, + | strides=None, + | padding='valid', + | data_format=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_max_pooling_2d.txt b/.tether/man/layer_max_pooling_2d.txt new file mode 100644 index 0000000000..1f460c3b05 --- /dev/null +++ b/.tether/man/layer_max_pooling_2d.txt @@ -0,0 +1,111 @@ +Help on class MaxPooling2D in module keras.src.layers.pooling.max_pooling2d: + +class MaxPooling2D(keras.src.layers.pooling.base_pooling.BasePooling) + | MaxPooling2D(pool_size=(2, 2), strides=None, padding='valid', data_format=None, name=None, **kwargs) + | + | Max pooling operation for 2D spatial data. + | + | Downsamples the input along its spatial dimensions (height and width) + | by taking the maximum value over an input window + | (of size defined by `pool_size`) for each channel of the input. + | The window is shifted by `strides` along each dimension. + | + | The resulting output when using the `"valid"` padding option has a spatial + | shape (number of rows or columns) of: + | `output_shape = math.floor((input_shape - pool_size) / strides) + 1` + | (when `input_shape >= pool_size`) + | + | The resulting output shape when using the `"same"` padding option is: + | `output_shape = math.floor((input_shape - 1) / strides) + 1` + | + | Args: + | pool_size: int or tuple of 2 integers, factors by which to downscale + | (dim1, dim2). If only one integer is specified, the same + | window length will be used for all dimensions. + | strides: int or tuple of 2 integers, or None. Strides values. If None, + | it will default to `pool_size`. If only one int is specified, the + | same stride size will be used for all dimensions. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 4D tensor with shape `(batch_size, height, width, channels)`. + | - If `data_format="channels_first"`: + | 4D tensor with shape `(batch_size, channels, height, width)`. + | + | Output shape: + | - If `data_format="channels_last"`: + | 4D tensor with shape + | `(batch_size, pooled_height, pooled_width, channels)`. + | - If `data_format="channels_first"`: + | 4D tensor with shape + | `(batch_size, channels, pooled_height, pooled_width)`. + | + | Examples: + | + | `strides=(1, 1)` and `padding="valid"`: + | + | >>> x = np.array([[1., 2., 3.], + | ... [4., 5., 6.], + | ... [7., 8., 9.]]) + | >>> x = np.reshape(x, [1, 3, 3, 1]) + | >>> max_pool_2d = keras.layers.MaxPooling2D(pool_size=(2, 2), + | ... strides=(1, 1), padding="valid") + | >>> max_pool_2d(x) + | + | `strides=(2, 2)` and `padding="valid"`: + | + | >>> x = np.array([[1., 2., 3., 4.], + | ... [5., 6., 7., 8.], + | ... [9., 10., 11., 12.]]) + | >>> x = np.reshape(x, [1, 3, 4, 1]) + | >>> max_pool_2d = keras.layers.MaxPooling2D(pool_size=(2, 2), + | ... strides=(2, 2), padding="valid") + | >>> max_pool_2d(x) + | + | `stride=(1, 1)` and `padding="same"`: + | + | >>> x = np.array([[1., 2., 3.], + | ... [4., 5., 6.], + | ... [7., 8., 9.]]) + | >>> x = np.reshape(x, [1, 3, 3, 1]) + | >>> max_pool_2d = keras.layers.MaxPooling2D(pool_size=(2, 2), + | ... strides=(1, 1), padding="same") + | >>> max_pool_2d(x) + | + | Method resolution order: + | MaxPooling2D + | keras.src.layers.pooling.base_pooling.BasePooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | pool_size=(2, 2), + | strides=None, + | padding='valid', + | data_format=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_max_pooling_3d.txt b/.tether/man/layer_max_pooling_3d.txt new file mode 100644 index 0000000000..a7d0eb1e24 --- /dev/null +++ b/.tether/man/layer_max_pooling_3d.txt @@ -0,0 +1,87 @@ +Help on class MaxPooling3D in module keras.src.layers.pooling.max_pooling3d: + +class MaxPooling3D(keras.src.layers.pooling.base_pooling.BasePooling) + | MaxPooling3D(pool_size=(2, 2, 2), strides=None, padding='valid', data_format=None, name=None, **kwargs) + | + | Max pooling operation for 3D data (spatial or spatio-temporal). + | + | Downsamples the input along its spatial dimensions (depth, height, and + | width) by taking the maximum value over an input window (of size defined by + | `pool_size`) for each channel of the input. The window is shifted by + | `strides` along each dimension. + | + | Args: + | pool_size: int or tuple of 3 integers, factors by which to downscale + | (dim1, dim2, dim3). If only one integer is specified, the same + | window length will be used for all dimensions. + | strides: int or tuple of 3 integers, or None. Strides values. If None, + | it will default to `pool_size`. If only one int is specified, the + | same stride size will be used for all dimensions. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input such that output has the same + | height/width dimension as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape + | `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while + | `"channels_first"` corresponds to inputs with shape + | `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | It defaults to the `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json`. If you never set it, then it + | will be `"channels_last"`. + | + | Input shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` + | + | Output shape: + | - If `data_format="channels_last"`: + | 5D tensor with shape: + | `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` + | - If `data_format="channels_first"`: + | 5D tensor with shape: + | `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` + | + | Example: + | + | ```python + | depth = 30 + | height = 30 + | width = 30 + | channels = 3 + | + | inputs = keras.layers.Input(shape=(depth, height, width, channels)) + | layer = keras.layers.MaxPooling3D(pool_size=3) + | outputs = layer(inputs) # Shape: (batch_size, 10, 10, 10, 3) + | ``` + | + | Method resolution order: + | MaxPooling3D + | keras.src.layers.pooling.base_pooling.BasePooling + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | pool_size=(2, 2, 2), + | strides=None, + | padding='valid', + | data_format=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_maximum.txt b/.tether/man/layer_maximum.txt new file mode 100644 index 0000000000..b2d1853e9e --- /dev/null +++ b/.tether/man/layer_maximum.txt @@ -0,0 +1,65 @@ +Help on class Maximum in module keras.src.layers.merging.maximum: + +class Maximum(keras.src.layers.merging.base_merge.Merge) + | Maximum(**kwargs) + | + | Computes element-wise maximum on a list of inputs. + | + | It takes as input a list of tensors, all of the same shape, + | and returns a single tensor (also of the same shape). + | + | Examples: + | + | >>> input_shape = (2, 3, 4) + | >>> x1 = np.random.rand(*input_shape) + | >>> x2 = np.random.rand(*input_shape) + | >>> y = keras.layers.Maximum()([x1, x2]) + | + | Usage in a Keras model: + | + | >>> input1 = keras.layers.Input(shape=(16,)) + | >>> x1 = keras.layers.Dense(8, activation='relu')(input1) + | >>> input2 = keras.layers.Input(shape=(32,)) + | >>> x2 = keras.layers.Dense(8, activation='relu')(input2) + | >>> # equivalent to `y = keras.layers.maximum([x1, x2])` + | >>> y = keras.layers.Maximum()([x1, x2]) + | >>> out = keras.layers.Dense(4)(y) + | >>> model = keras.models.Model(inputs=[input1, input2], outputs=out) + | + | Method resolution order: + | Maximum + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods inherited from keras.src.layers.merging.base_merge.Merge: + | + | __init__(self, **kwargs) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_mel_spectrogram.txt b/.tether/man/layer_mel_spectrogram.txt new file mode 100644 index 0000000000..89875dad1e --- /dev/null +++ b/.tether/man/layer_mel_spectrogram.txt @@ -0,0 +1,190 @@ +Help on class MelSpectrogram in module keras.src.layers.preprocessing.audio_preprocessing: + +class MelSpectrogram(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | MelSpectrogram(fft_length=2048, sequence_stride=512, sequence_length=None, window='hann', sampling_rate=16000, num_mel_bins=128, min_freq=20.0, max_freq=None, power_to_db=True, top_db=80.0, mag_exp=2.0, min_power=1e-10, ref_power=1.0, **kwargs) + | + | A preprocessing layer to convert raw audio signals to Mel spectrograms. + | + | This layer takes `float32`/`float64` single or batched audio signal as + | inputs and computes the Mel spectrogram using Short-Time Fourier Transform + | and Mel scaling. The input should be a 1D (unbatched) or 2D (batched) tensor + | representing audio signals. The output will be a 2D or 3D tensor + | representing Mel spectrograms. + | + | A spectrogram is an image-like representation that shows the frequency + | spectrum of a signal over time. It uses x-axis to represent time, y-axis to + | represent frequency, and each pixel to represent intensity. + | Mel spectrograms are a special type of spectrogram that use the mel scale, + | which approximates how humans perceive sound. They are commonly used in + | speech and music processing tasks like speech recognition, speaker + | identification, and music genre classification. + | + | References: + | - [Spectrogram](https://en.wikipedia.org/wiki/Spectrogram), + | - [Mel scale](https://en.wikipedia.org/wiki/Mel_scale). + | + | Examples: + | + | **Unbatched audio signal** + | + | >>> layer = keras.layers.MelSpectrogram(num_mel_bins=64, + | ... sampling_rate=8000, + | ... sequence_stride=256, + | ... fft_length=2048) + | >>> layer(keras.random.uniform(shape=(16000,))).shape + | (64, 63) + | + | **Batched audio signal** + | + | >>> layer = keras.layers.MelSpectrogram(num_mel_bins=80, + | ... sampling_rate=8000, + | ... sequence_stride=128, + | ... fft_length=2048) + | >>> layer(keras.random.uniform(shape=(2, 16000))).shape + | (2, 80, 125) + | + | Input shape: + | 1D (unbatched) or 2D (batched) tensor with shape:`(..., samples)`. + | + | Output shape: + | 2D (unbatched) or 3D (batched) tensor with + | shape:`(..., num_mel_bins, time)`. + | + | Args: + | fft_length: Integer, size of the FFT window. + | sequence_stride: Integer, number of samples between successive STFT + | columns. + | sequence_length: Integer, size of the window used for applying + | `window` to each audio frame. If `None`, defaults to `fft_length`. + | window: String, name of the window function to use. Available values + | are `"hann"` and `"hamming"`. If `window` is a tensor, it will be + | used directly as the window and its length must be + | `sequence_length`. If `window` is `None`, no windowing is + | used. Defaults to `"hann"`. + | sampling_rate: Integer, sample rate of the input signal. + | num_mel_bins: Integer, number of mel bins to generate. + | min_freq: Float, minimum frequency of the mel bins. + | max_freq: Float, maximum frequency of the mel bins. + | If `None`, defaults to `sampling_rate / 2`. + | power_to_db: If True, convert the power spectrogram to decibels. + | top_db: Float, minimum negative cut-off `max(10 * log10(S)) - top_db`. + | mag_exp: Float, exponent for the magnitude spectrogram. + | 1 for magnitude, 2 for power, etc. Default is 2. + | ref_power: Float, the power is scaled relative to it + | `10 * log10(S / ref_power)`. + | min_power: Float, minimum value for power and `ref_power`. + | + | Method resolution order: + | MelSpectrogram + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | fft_length=2048, + | sequence_stride=512, + | sequence_length=None, + | window='hann', + | sampling_rate=16000, + | num_mel_bins=128, + | min_freq=20.0, + | max_freq=None, + | power_to_db=True, + | top_db=80.0, + | mag_exp=2.0, + | min_power=1e-10, + | ref_power=1.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | linear_to_mel_weight_matrix( + | self, + | num_mel_bins=20, + | num_spectrogram_bins=129, + | sampling_rate=8000, + | lower_edge_hertz=125.0, + | upper_edge_hertz=3800.0, + | dtype='float32' + | ) + | Returns a matrix to warp linear scale spectrograms to the mel scale. + | + | Returns a weight matrix that can be used to re-weight a tensor + | containing `num_spectrogram_bins` linearly sampled frequency information + | from `[0, sampling_rate / 2]` into `num_mel_bins` frequency information + | from `[lower_edge_hertz, upper_edge_hertz]` on the mel scale. + | + | This function follows the [Hidden Markov Model Toolkit (HTK)]( + | http://htk.eng.cam.ac.uk/) convention, defining the mel scale in + | terms of a frequency in hertz according to the following formula: + | + | ```mel(f) = 2595 * log10( 1 + f/700)``` + | + | In the returned matrix, all the triangles (filterbanks) have a peak + | value of 1.0. + | + | For example, the returned matrix `A` can be used to right-multiply a + | spectrogram `S` of shape `[frames, num_spectrogram_bins]` of linear + | scale spectrum values (e.g. STFT magnitudes) to generate a + | "mel spectrogram" `M` of shape `[frames, num_mel_bins]`. + | + | ``` + | # `S` has shape [frames, num_spectrogram_bins] + | # `M` has shape [frames, num_mel_bins] + | M = keras.ops.matmul(S, A) + | ``` + | + | The matrix can be used with `keras.ops.tensordot` to convert an + | arbitrary rank `Tensor` of linear-scale spectral bins into the + | mel scale. + | + | ``` + | # S has shape [..., num_spectrogram_bins]. + | # M has shape [..., num_mel_bins]. + | M = keras.ops.tensordot(S, A, 1) + | ``` + | + | References: + | - [Mel scale (Wikipedia)](https://en.wikipedia.org/wiki/Mel_scale) + | + | Args: + | num_mel_bins: Python int. How many bands in the resulting + | mel spectrum. + | num_spectrogram_bins: An integer `Tensor`. How many bins there are + | in the source spectrogram data, which is understood to be + | `fft_size // 2 + 1`, i.e. the spectrogram only contains the + | nonredundant FFT bins. + | sampling_rate: An integer or float `Tensor`. Samples per second of + | the input signal used to create the spectrogram. Used to figure + | out the frequencies corresponding to each spectrogram bin, + | which dictates how they are mapped into the mel scale. + | lower_edge_hertz: Python float. Lower bound on the frequencies to be + | included in the mel spectrum. This corresponds to the lower + | edge of the lowest triangular band. + | upper_edge_hertz: Python float. The desired top edge of the highest + | frequency band. + | dtype: The `DType` of the result matrix. Must be a floating point + | type. + | + | Returns: + | A tensor of shape `[num_spectrogram_bins, num_mel_bins]`. + | + diff --git a/.tether/man/layer_minimum.txt b/.tether/man/layer_minimum.txt new file mode 100644 index 0000000000..685831ddb3 --- /dev/null +++ b/.tether/man/layer_minimum.txt @@ -0,0 +1,65 @@ +Help on class Minimum in module keras.src.layers.merging.minimum: + +class Minimum(keras.src.layers.merging.base_merge.Merge) + | Minimum(**kwargs) + | + | Computes elementwise minimum on a list of inputs. + | + | It takes as input a list of tensors, all of the same shape, + | and returns a single tensor (also of the same shape). + | + | Examples: + | + | >>> input_shape = (2, 3, 4) + | >>> x1 = np.random.rand(*input_shape) + | >>> x2 = np.random.rand(*input_shape) + | >>> y = keras.layers.Minimum()([x1, x2]) + | + | Usage in a Keras model: + | + | >>> input1 = keras.layers.Input(shape=(16,)) + | >>> x1 = keras.layers.Dense(8, activation='relu')(input1) + | >>> input2 = keras.layers.Input(shape=(32,)) + | >>> x2 = keras.layers.Dense(8, activation='relu')(input2) + | >>> # equivalent to `y = keras.layers.minimum([x1, x2])` + | >>> y = keras.layers.Minimum()([x1, x2]) + | >>> out = keras.layers.Dense(4)(y) + | >>> model = keras.models.Model(inputs=[input1, input2], outputs=out) + | + | Method resolution order: + | Minimum + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods inherited from keras.src.layers.merging.base_merge.Merge: + | + | __init__(self, **kwargs) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_multi_head_attention.txt b/.tether/man/layer_multi_head_attention.txt new file mode 100644 index 0000000000..c9651aeede --- /dev/null +++ b/.tether/man/layer_multi_head_attention.txt @@ -0,0 +1,192 @@ +Help on class MultiHeadAttention in module keras.src.layers.attention.multi_head_attention: + +class MultiHeadAttention(keras.src.layers.layer.Layer) + | MultiHeadAttention(num_heads, key_dim, value_dim=None, dropout=0.0, use_bias=True, output_shape=None, attention_axes=None, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, **kwargs) + | + | MultiHeadAttention layer. + | + | This is an implementation of multi-headed attention as described in the + | paper "Attention is all you Need" + | [Vaswani et al., 2017](https://arxiv.org/abs/1706.03762). + | If `query`, `key,` `value` are the same, then + | this is self-attention. Each timestep in `query` attends to the + | corresponding sequence in `key`, and returns a fixed-width vector. + | + | This layer first projects `query`, `key` and `value`. These are + | (effectively) a list of tensors of length `num_attention_heads`, where the + | corresponding shapes are `(batch_size, , key_dim)`, + | `(batch_size, , key_dim)`, + | `(batch_size, , value_dim)`. + | + | Then, the query and key tensors are dot-producted and scaled. These are + | softmaxed to obtain attention probabilities. The value tensors are then + | interpolated by these probabilities, then concatenated back to a single + | tensor. + | + | Finally, the result tensor with the last dimension as `value_dim` can take + | a linear projection and return. + | + | Args: + | num_heads: Number of attention heads. + | key_dim: Size of each attention head for query and key. + | value_dim: Size of each attention head for value. + | dropout: Dropout probability. + | use_bias: Boolean, whether the dense layers use bias vectors/matrices. + | output_shape: The expected shape of an output tensor, besides the batch + | and sequence dims. If not specified, projects back to the query + | feature dim (the query input's last dimension). + | attention_axes: axes over which the attention is applied. `None` means + | attention over all axes, but batch, heads, and features. + | kernel_initializer: Initializer for dense layer kernels. + | bias_initializer: Initializer for dense layer biases. + | kernel_regularizer: Regularizer for dense layer kernels. + | bias_regularizer: Regularizer for dense layer biases. + | activity_regularizer: Regularizer for dense layer activity. + | kernel_constraint: Constraint for dense layer kernels. + | bias_constraint: Constraint for dense layer kernels. + | + | Call arguments: + | query: Query tensor of shape `(B, T, dim)`, where `B` is the batch size, + | `T` is the target sequence length, and dim is the feature dimension. + | value: Value tensor of shape `(B, S, dim)`, where `B` is the batch size, + | `S` is the source sequence length, and dim is the feature dimension. + | key: Optional key tensor of shape `(B, S, dim)`. If not given, will + | use `value` for both `key` and `value`, which is the most common + | case. + | attention_mask: a boolean mask of shape `(B, T, S)`, that prevents + | attention to certain positions. The boolean mask specifies which + | query elements can attend to which key elements, 1 indicates + | attention and 0 indicates no attention. Broadcasting can happen for + | the missing batch dimensions and the head dimension. + | return_attention_scores: A boolean to indicate whether the output should + | be `(attention_output, attention_scores)` if `True`, or + | `attention_output` if `False`. Defaults to `False`. + | training: Python boolean indicating whether the layer should behave in + | training mode (adding dropout) or in inference mode (no dropout). + | Will go with either using the training mode of the parent + | layer/model, or `False` (inference) if there is no parent layer. + | use_causal_mask: A boolean to indicate whether to apply a causal mask to + | prevent tokens from attending to future tokens (e.g., used in a + | decoder Transformer). + | + | Returns: + | attention_output: The result of the computation, of shape `(B, T, E)`, + | where `T` is for target sequence shapes and `E` is the query input + | last dimension if `output_shape` is `None`. Otherwise, the + | multi-head outputs are projected to the shape specified by + | `output_shape`. + | attention_scores: (Optional) multi-head attention coefficients over + | attention axes. + | + | Method resolution order: + | MultiHeadAttention + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_heads, + | key_dim, + | value_dim=None, + | dropout=0.0, + | use_bias=True, + | output_shape=None, + | attention_axes=None, + | kernel_initializer='glorot_uniform', + | bias_initializer='zeros', + | kernel_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build( + | self, + | query_shape, + | value_shape, + | key_shape=None + | ) + | Builds layers and variables. + | + | Args: + | query_shape: Shape of the `query` tensor. + | value_shape: Shape of the `value` tensor. + | key: Optional shape of the `key` tensor. + | + | call( + | self, + | query, + | value, + | key=None, + | query_mask=None, + | value_mask=None, + | key_mask=None, + | attention_mask=None, + | return_attention_scores=False, + | training=None, + | use_causal_mask=False + | ) + | + | compute_output_shape( + | self, + | query_shape, + | value_shape, + | key_shape=None + | ) + | + | compute_output_spec( + | self, + | query, + | value, + | key=None, + | query_mask=None, + | value_mask=None, + | key_mask=None, + | attention_mask=None, + | return_attention_scores=False, + | training=None, + | use_causal_mask=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | attention_axes + | + | dropout + | + | key_dense + | + | key_dim + | + | num_heads + | + | output_dense + | + | output_shape + | + | query_dense + | + | use_bias + | + | value_dense + | + | value_dim + | + diff --git a/.tether/man/layer_multiply.txt b/.tether/man/layer_multiply.txt new file mode 100644 index 0000000000..b0057cb29a --- /dev/null +++ b/.tether/man/layer_multiply.txt @@ -0,0 +1,65 @@ +Help on class Multiply in module keras.src.layers.merging.multiply: + +class Multiply(keras.src.layers.merging.base_merge.Merge) + | Multiply(**kwargs) + | + | Performs elementwise multiplication. + | + | It takes as input a list of tensors, all of the same shape, + | and returns a single tensor (also of the same shape). + | + | Examples: + | + | >>> input_shape = (2, 3, 4) + | >>> x1 = np.random.rand(*input_shape) + | >>> x2 = np.random.rand(*input_shape) + | >>> y = keras.layers.Multiply()([x1, x2]) + | + | Usage in a Keras model: + | + | >>> input1 = keras.layers.Input(shape=(16,)) + | >>> x1 = keras.layers.Dense(8, activation='relu')(input1) + | >>> input2 = keras.layers.Input(shape=(32,)) + | >>> x2 = keras.layers.Dense(8, activation='relu')(input2) + | >>> # equivalent to `y = keras.layers.multiply([x1, x2])` + | >>> y = keras.layers.Multiply()([x1, x2]) + | >>> out = keras.layers.Dense(4)(y) + | >>> model = keras.models.Model(inputs=[input1, input2], outputs=out) + | + | Method resolution order: + | Multiply + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods inherited from keras.src.layers.merging.base_merge.Merge: + | + | __init__(self, **kwargs) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_mask( + | self, + | inputs, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_normalization.txt b/.tether/man/layer_normalization.txt new file mode 100644 index 0000000000..5ca355bc82 --- /dev/null +++ b/.tether/man/layer_normalization.txt @@ -0,0 +1,180 @@ +Help on class Normalization in module keras.src.layers.preprocessing.normalization: + +class Normalization(keras.src.layers.layer.Layer) + | Normalization(axis=-1, mean=None, variance=None, invert=False, **kwargs) + | + | A preprocessing layer that normalizes continuous features. + | + | This layer will shift and scale inputs into a distribution centered around + | 0 with standard deviation 1. It accomplishes this by precomputing the mean + | and variance of the data, and calling `(input - mean) / sqrt(var)` at + | runtime. + | + | The mean and variance values for the layer must be either supplied on + | construction or learned via `adapt()`. `adapt()` will compute the mean and + | variance of the data and store them as the layer's weights. `adapt()` should + | be called before `fit()`, `evaluate()`, or `predict()`. + | + | Args: + | axis: Integer, tuple of integers, or None. The axis or axes that should + | have a separate mean and variance for each index in the shape. + | For example, if shape is `(None, 5)` and `axis=1`, the layer will + | track 5 separate mean and variance values for the last axis. + | If `axis` is set to `None`, the layer will normalize + | all elements in the input by a scalar mean and variance. + | When `-1`, the last axis of the input is assumed to be a + | feature dimension and is normalized per index. + | Note that in the specific case of batched scalar inputs where + | the only axis is the batch axis, the default will normalize + | each index in the batch separately. + | In this case, consider passing `axis=None`. Defaults to `-1`. + | mean: The mean value(s) to use during normalization. The passed value(s) + | will be broadcast to the shape of the kept axes above; + | if the value(s) cannot be broadcast, an error will be raised when + | this layer's `build()` method is called. + | variance: The variance value(s) to use during normalization. The passed + | value(s) will be broadcast to the shape of the kept axes above; + | if the value(s) cannot be broadcast, an error will be raised when + | this layer's `build()` method is called. + | invert: If `True`, this layer will apply the inverse transformation + | to its inputs: it would turn a normalized input back into its + | original form. + | + | Examples: + | + | Calculate a global mean and variance by analyzing the dataset in `adapt()`. + | + | >>> adapt_data = np.array([1., 2., 3., 4., 5.], dtype='float32') + | >>> input_data = np.array([1., 2., 3.], dtype='float32') + | >>> layer = keras.layers.Normalization(axis=None) + | >>> layer.adapt(adapt_data) + | >>> layer(input_data) + | array([-1.4142135, -0.70710677, 0.], dtype=float32) + | + | Calculate a mean and variance for each index on the last axis. + | + | >>> adapt_data = np.array([[0., 7., 4.], + | ... [2., 9., 6.], + | ... [0., 7., 4.], + | ... [2., 9., 6.]], dtype='float32') + | >>> input_data = np.array([[0., 7., 4.]], dtype='float32') + | >>> layer = keras.layers.Normalization(axis=-1) + | >>> layer.adapt(adapt_data) + | >>> layer(input_data) + | array([-1., -1., -1.], dtype=float32) + | + | Pass the mean and variance directly. + | + | >>> input_data = np.array([[1.], [2.], [3.]], dtype='float32') + | >>> layer = keras.layers.Normalization(mean=3., variance=2.) + | >>> layer(input_data) + | array([[-1.4142135 ], + | [-0.70710677], + | [ 0. ]], dtype=float32) + | + | Use the layer to de-normalize inputs (after adapting the layer). + | + | >>> adapt_data = np.array([[0., 7., 4.], + | ... [2., 9., 6.], + | ... [0., 7., 4.], + | ... [2., 9., 6.]], dtype='float32') + | >>> input_data = np.array([[1., 2., 3.]], dtype='float32') + | >>> layer = keras.layers.Normalization(axis=-1, invert=True) + | >>> layer.adapt(adapt_data) + | >>> layer(input_data) + | array([2., 10., 8.], dtype=float32) + | + | Method resolution order: + | Normalization + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axis=-1, + | mean=None, + | variance=None, + | invert=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | adapt(self, data) + | Computes the mean and variance of values in a dataset. + | + | Calling `adapt()` on a `Normalization` layer is an alternative to + | passing in `mean` and `variance` arguments during layer construction. A + | `Normalization` layer should always either be adapted over a dataset or + | passed `mean` and `variance`. + | + | During `adapt()`, the layer will compute a `mean` and `variance` + | separately for each position in each axis specified by the `axis` + | argument. To calculate a single `mean` and `variance` over the input + | data, simply pass `axis=None` to the layer. + | + | Arg: + | data: The data to train on. It can be passed either as a + | `tf.data.Dataset`, as a NumPy array, or as a backend-native + | eager tensor. + | If a dataset, *it must be batched*. Keras will assume that the + | data is batched, and if that assumption doesn't hold, the mean + | and variance may be incorrectly computed. + | + | build(self, input_shape) + | + | build_from_config(self, config) + | Builds the layer's states with the supplied config dict. + | + | By default, this method calls the `build(config["input_shape"])` method, + | which creates weights based on the layer's input shape in the supplied + | config. If your config contains other information needed to load the + | layer's state, you should override this method. + | + | Args: + | config: Dict containing the input shape associated with this layer. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | finalize_state(self) + | + | get_build_config(self) + | Returns a dictionary with the layer's input shape. + | + | This method returns a config dict that can be used by + | `build_from_config(config)` to create all states (e.g. Variables and + | Lookup tables) needed by the layer. + | + | By default, the config only contains the input shape that the layer + | was built with. If you're writing a custom layer that creates state in + | an unusual way, you should override this method to make sure this state + | is already created when Keras attempts to load its value upon model + | loading. + | + | Returns: + | A dict containing the input shape associated with the layer. + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + diff --git a/.tether/man/layer_permute.txt b/.tether/man/layer_permute.txt new file mode 100644 index 0000000000..e9bd5b0c27 --- /dev/null +++ b/.tether/man/layer_permute.txt @@ -0,0 +1,61 @@ +Help on class Permute in module keras.src.layers.reshaping.permute: + +class Permute(keras.src.layers.layer.Layer) + | Permute(dims, **kwargs) + | + | Permutes the dimensions of the input according to a given pattern. + | + | Useful e.g. connecting RNNs and convnets. + | + | Args: + | dims: Tuple of integers. Permutation pattern does not include the + | batch dimension. Indexing starts at 1. + | For instance, `(2, 1)` permutes the first and second dimensions + | of the input. + | + | Input shape: + | Arbitrary. + | + | Output shape: + | Same as the input shape, but with the dimensions re-ordered according + | to the specified pattern. + | + | Example: + | + | >>> x = keras.Input(shape=(10, 64)) + | >>> y = keras.layers.Permute((2, 1))(x) + | >>> y.shape + | (None, 64, 10) + | + | Method resolution order: + | Permute + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | dims, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_random_brightness.txt b/.tether/man/layer_random_brightness.txt new file mode 100644 index 0000000000..2ad5710608 --- /dev/null +++ b/.tether/man/layer_random_brightness.txt @@ -0,0 +1,98 @@ +Help on class RandomBrightness in module keras.src.layers.preprocessing.random_brightness: + +class RandomBrightness(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | RandomBrightness(factor, value_range=(0, 255), seed=None, **kwargs) + | + | A preprocessing layer which randomly adjusts brightness during training. + | + | This layer will randomly increase/reduce the brightness for the input RGB + | images. At inference time, the output will be identical to the input. + | Call the layer with `training=True` to adjust the brightness of the input. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | factor: Float or a list/tuple of 2 floats between -1.0 and 1.0. The + | factor is used to determine the lower bound and upper bound of the + | brightness adjustment. A float value will be chosen randomly between + | the limits. When -1.0 is chosen, the output image will be black, and + | when 1.0 is chosen, the image will be fully white. + | When only one float is provided, eg, 0.2, + | then -0.2 will be used for lower bound and 0.2 + | will be used for upper bound. + | value_range: Optional list/tuple of 2 floats + | for the lower and upper limit + | of the values of the input data. + | To make no change, use `[0.0, 1.0]`, e.g., if the image input + | has been scaled before this layer. Defaults to `[0.0, 255.0]`. + | The brightness adjustment will be scaled to this range, and the + | output values will be clipped to this range. + | seed: optional integer, for fixed RNG behavior. + | + | Inputs: 3D (HWC) or 4D (NHWC) tensor, with float or int dtype. Input pixel + | values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) + | + | Output: 3D (HWC) or 4D (NHWC) tensor with brightness adjusted based on the + | `factor`. By default, the layer will output floats. + | The output value will be clipped to the range `[0, 255]`, + | the valid range of RGB colors, and + | rescaled based on the `value_range` if needed. + | + | Example: + | + | ```python + | random_bright = keras.layers.RandomBrightness(factor=0.2) + | + | # An image with shape [2, 2, 3] + | image = [[[1, 2, 3], [4 ,5 ,6]], [[7, 8, 9], [10, 11, 12]]] + | + | # Assume we randomly select the factor to be 0.1, then it will apply + | # 0.1 * 255 to all the channel + | output = random_bright(image, training=True) + | + | # output will be int64 with 25.5 added to each channel and round down. + | >>> array([[[26.5, 27.5, 28.5] + | [29.5, 30.5, 31.5]] + | [[32.5, 33.5, 34.5] + | [35.5, 36.5, 37.5]]], + | shape=(2, 2, 3), dtype=int64) + | ``` + | + | Method resolution order: + | RandomBrightness + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | factor, + | value_range=(0, 255), + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=True + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_random_contrast.txt b/.tether/man/layer_random_contrast.txt new file mode 100644 index 0000000000..85b3a2c5f1 --- /dev/null +++ b/.tether/man/layer_random_contrast.txt @@ -0,0 +1,76 @@ +Help on class RandomContrast in module keras.src.layers.preprocessing.random_contrast: + +class RandomContrast(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | RandomContrast(factor, seed=None, **kwargs) + | + | A preprocessing layer which randomly adjusts contrast during training. + | + | This layer will randomly adjust the contrast of an image or images + | by a random factor. Contrast is adjusted independently + | for each channel of each image during training. + | + | For each channel, this layer computes the mean of the image pixels in the + | channel and then adjusts each component `x` of each pixel to + | `(x - mean) * contrast_factor + mean`. + | + | Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + | in integer or floating point dtype. + | By default, the layer will output floats. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format. + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format. + | + | Args: + | factor: a positive float represented as fraction of value, or a tuple of + | size 2 representing lower and upper bound. + | When represented as a single float, lower = upper. + | The contrast factor will be randomly picked between + | `[1.0 - lower, 1.0 + upper]`. For any pixel x in the channel, + | the output will be `(x - mean) * factor + mean` + | where `mean` is the mean value of the channel. + | seed: Integer. Used to create a random seed. + | + | Method resolution order: + | RandomContrast + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | factor, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=True + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_random_crop.txt b/.tether/man/layer_random_crop.txt new file mode 100644 index 0000000000..414e622725 --- /dev/null +++ b/.tether/man/layer_random_crop.txt @@ -0,0 +1,83 @@ +Help on class RandomCrop in module keras.src.layers.preprocessing.random_crop: + +class RandomCrop(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | RandomCrop(height, width, seed=None, data_format=None, name=None, **kwargs) + | + | A preprocessing layer which randomly crops images during training. + | + | During training, this layer will randomly choose a location to crop images + | down to a target size. The layer will crop all the images in the same batch + | to the same cropping location. + | + | At inference time, and during training if an input image is smaller than the + | target size, the input will be resized and cropped so as to return the + | largest possible window in the image that matches the target aspect ratio. + | If you need to apply random cropping at inference time, set `training` to + | True when calling the layer. + | + | Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + | of integer or floating point dtype. By default, the layer will output + | floats. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format. + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., target_height, target_width, channels)`. + | + | Args: + | height: Integer, the height of the output shape. + | width: Integer, the width of the output shape. + | seed: Integer. Used to create a random seed. + | **kwargs: Base layer keyword arguments, such as + | `name` and `dtype`. + | + | Method resolution order: + | RandomCrop + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | height, + | width, + | seed=None, + | data_format=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=True + | ) + | + | compute_output_shape( + | self, + | input_shape, + | *args, + | **kwargs + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_random_flip.txt b/.tether/man/layer_random_flip.txt new file mode 100644 index 0000000000..c689650376 --- /dev/null +++ b/.tether/man/layer_random_flip.txt @@ -0,0 +1,70 @@ +Help on class RandomFlip in module keras.src.layers.preprocessing.random_flip: + +class RandomFlip(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | RandomFlip(mode='horizontal_and_vertical', seed=None, **kwargs) + | + | A preprocessing layer which randomly flips images during training. + | + | This layer will flip the images horizontally and or vertically based on the + | `mode` attribute. During inference time, the output will be identical to + | input. Call the layer with `training=True` to flip the input. + | Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + | of integer or floating point dtype. + | By default, the layer will output floats. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format. + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format. + | + | Args: + | mode: String indicating which flip mode to use. Can be `"horizontal"`, + | `"vertical"`, or `"horizontal_and_vertical"`. `"horizontal"` is a + | left-right flip and `"vertical"` is a top-bottom flip. Defaults to + | `"horizontal_and_vertical"` + | seed: Integer. Used to create a random seed. + | **kwargs: Base layer keyword arguments, such as + | `name` and `dtype`. + | + | Method resolution order: + | RandomFlip + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | mode='horizontal_and_vertical', + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=True + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_random_rotation.txt b/.tether/man/layer_random_rotation.txt new file mode 100644 index 0000000000..ce7e427cb9 --- /dev/null +++ b/.tether/man/layer_random_rotation.txt @@ -0,0 +1,104 @@ +Help on class RandomRotation in module keras.src.layers.preprocessing.random_rotation: + +class RandomRotation(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | RandomRotation(factor, fill_mode='reflect', interpolation='bilinear', seed=None, fill_value=0.0, value_range=(0, 255), data_format=None, **kwargs) + | + | A preprocessing layer which randomly rotates images during training. + | + | This layer will apply random rotations to each image, filling empty space + | according to `fill_mode`. + | + | By default, random rotations are only applied during training. + | At inference time, the layer does nothing. If you need to apply random + | rotations at inference time, pass `training=True` when calling the layer. + | + | Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + | of integer or floating point dtype. + | By default, the layer will output floats. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format + | + | Args: + | factor: a float represented as fraction of 2 Pi, or a tuple of size 2 + | representing lower and upper bound for rotating clockwise and + | counter-clockwise. A positive values means rotating + | counter clock-wise, + | while a negative value means clock-wise. + | When represented as a single + | float, this value is used for both the upper and lower bound. + | For instance, `factor=(-0.2, 0.3)` + | results in an output rotation by a random + | amount in the range `[-20% * 2pi, 30% * 2pi]`. + | `factor=0.2` results in an + | output rotating by a random amount + | in the range `[-20% * 2pi, 20% * 2pi]`. + | fill_mode: Points outside the boundaries of the input are filled + | according to the given mode + | (one of `{"constant", "reflect", "wrap", "nearest"}`). + | - *reflect*: `(d c b a | a b c d | d c b a)` + | The input is extended by reflecting about + | the edge of the last pixel. + | - *constant*: `(k k k k | a b c d | k k k k)` + | The input is extended by + | filling all values beyond the edge with + | the same constant value k = 0. + | - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by + | wrapping around to the opposite edge. + | - *nearest*: `(a a a a | a b c d | d d d d)` + | The input is extended by the nearest pixel. + | interpolation: Interpolation mode. Supported values: `"nearest"`, + | `"bilinear"`. + | seed: Integer. Used to create a random seed. + | fill_value: a float represents the value to be filled outside + | the boundaries when `fill_mode="constant"`. + | + | Method resolution order: + | RandomRotation + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | factor, + | fill_mode='reflect', + | interpolation='bilinear', + | seed=None, + | fill_value=0.0, + | value_range=(0, 255), + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=True + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_random_translation.txt b/.tether/man/layer_random_translation.txt new file mode 100644 index 0000000000..8eb6b13cae --- /dev/null +++ b/.tether/man/layer_random_translation.txt @@ -0,0 +1,120 @@ +Help on class RandomTranslation in module keras.src.layers.preprocessing.random_translation: + +class RandomTranslation(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | RandomTranslation(height_factor, width_factor, fill_mode='reflect', interpolation='bilinear', seed=None, fill_value=0.0, data_format=None, **kwargs) + | + | A preprocessing layer which randomly translates images during training. + | + | This layer will apply random translations to each image during training, + | filling empty space according to `fill_mode`. + | + | Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + | of integer or floating point dtype. By default, the layer will output + | floats. + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format, + | or `(..., channels, height, width)`, in `"channels_first"` format. + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., target_height, target_width, channels)`, + | or `(..., channels, target_height, target_width)`, + | in `"channels_first"` format. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | height_factor: a float represented as fraction of value, or a tuple of + | size 2 representing lower and upper bound for shifting vertically. A + | negative value means shifting image up, while a positive value means + | shifting image down. When represented as a single positive float, + | this value is used for both the upper and lower bound. For instance, + | `height_factor=(-0.2, 0.3)` results in an output shifted by a random + | amount in the range `[-20%, +30%]`. `height_factor=0.2` results in + | an output height shifted by a random amount in the range + | `[-20%, +20%]`. + | width_factor: a float represented as fraction of value, or a tuple of + | size 2 representing lower and upper bound for shifting horizontally. + | A negative value means shifting image left, while a positive value + | means shifting image right. When represented as a single positive + | float, this value is used for both the upper and lower bound. For + | instance, `width_factor=(-0.2, 0.3)` results in an output shifted + | left by 20%, and shifted right by 30%. `width_factor=0.2` results + | in an output height shifted left or right by 20%. + | fill_mode: Points outside the boundaries of the input are filled + | according to the given mode. Available methods are `"constant"`, + | `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. + | - `"reflect"`: `(d c b a | a b c d | d c b a)` + | The input is extended by reflecting about the edge of the last + | pixel. + | - `"constant"`: `(k k k k | a b c d | k k k k)` + | The input is extended by filling all values beyond + | the edge with the same constant value k specified by + | `fill_value`. + | - `"wrap"`: `(a b c d | a b c d | a b c d)` + | The input is extended by wrapping around to the opposite edge. + | - `"nearest"`: `(a a a a | a b c d | d d d d)` + | The input is extended by the nearest pixel. + | Note that when using torch backend, `"reflect"` is redirected to + | `"mirror"` `(c d c b | a b c d | c b a b)` because torch does not + | support `"reflect"`. + | Note that torch backend does not support `"wrap"`. + | interpolation: Interpolation mode. Supported values: `"nearest"`, + | `"bilinear"`. + | seed: Integer. Used to create a random seed. + | fill_value: a float represents the value to be filled outside the + | boundaries when `fill_mode="constant"`. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Method resolution order: + | RandomTranslation + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | height_factor, + | width_factor, + | fill_mode='reflect', + | interpolation='bilinear', + | seed=None, + | fill_value=0.0, + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=True + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_random_zoom.txt b/.tether/man/layer_random_zoom.txt new file mode 100644 index 0000000000..f67ab391a5 --- /dev/null +++ b/.tether/man/layer_random_zoom.txt @@ -0,0 +1,126 @@ +Help on class RandomZoom in module keras.src.layers.preprocessing.random_zoom: + +class RandomZoom(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | RandomZoom(height_factor, width_factor=None, fill_mode='reflect', interpolation='bilinear', seed=None, fill_value=0.0, data_format=None, **kwargs) + | + | A preprocessing layer which randomly zooms images during training. + | + | This layer will randomly zoom in or out on each axis of an image + | independently, filling empty space according to `fill_mode`. + | + | Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + | of integer or floating point dtype. + | By default, the layer will output floats. + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format, + | or `(..., channels, height, width)`, in `"channels_first"` format. + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., target_height, target_width, channels)`, + | or `(..., channels, target_height, target_width)`, + | in `"channels_first"` format. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | height_factor: a float represented as fraction of value, or a tuple of + | size 2 representing lower and upper bound for zooming vertically. + | When represented as a single float, this value is used for both the + | upper and lower bound. A positive value means zooming out, while a + | negative value means zooming in. For instance, + | `height_factor=(0.2, 0.3)` result in an output zoomed out by a + | random amount in the range `[+20%, +30%]`. + | `height_factor=(-0.3, -0.2)` result in an output zoomed in by a + | random amount in the range `[+20%, +30%]`. + | width_factor: a float represented as fraction of value, or a tuple of + | size 2 representing lower and upper bound for zooming horizontally. + | When represented as a single float, this value is used for both the + | upper and lower bound. For instance, `width_factor=(0.2, 0.3)` + | result in an output zooming out between 20% to 30%. + | `width_factor=(-0.3, -0.2)` result in an output zooming in between + | 20% to 30%. `None` means i.e., zooming vertical and horizontal + | directions by preserving the aspect ratio. Defaults to `None`. + | fill_mode: Points outside the boundaries of the input are filled + | according to the given mode. Available methods are `"constant"`, + | `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. + | - `"reflect"`: `(d c b a | a b c d | d c b a)` + | The input is extended by reflecting about the edge of the last + | pixel. + | - `"constant"`: `(k k k k | a b c d | k k k k)` + | The input is extended by filling all values beyond + | the edge with the same constant value k specified by + | `fill_value`. + | - `"wrap"`: `(a b c d | a b c d | a b c d)` + | The input is extended by wrapping around to the opposite edge. + | - `"nearest"`: `(a a a a | a b c d | d d d d)` + | The input is extended by the nearest pixel. + | Note that when using torch backend, `"reflect"` is redirected to + | `"mirror"` `(c d c b | a b c d | c b a b)` because torch does not + | support `"reflect"`. + | Note that torch backend does not support `"wrap"`. + | interpolation: Interpolation mode. Supported values: `"nearest"`, + | `"bilinear"`. + | seed: Integer. Used to create a random seed. + | fill_value: a float that represents the value to be filled outside + | the boundaries when `fill_mode="constant"`. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Example: + | + | >>> input_img = np.random.random((32, 224, 224, 3)) + | >>> layer = keras.layers.RandomZoom(.5, .2) + | >>> out_img = layer(input_img) + | + | Method resolution order: + | RandomZoom + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | height_factor, + | width_factor=None, + | fill_mode='reflect', + | interpolation='bilinear', + | seed=None, + | fill_value=0.0, + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=True + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_repeat_vector.txt b/.tether/man/layer_repeat_vector.txt new file mode 100644 index 0000000000..c8351ca68b --- /dev/null +++ b/.tether/man/layer_repeat_vector.txt @@ -0,0 +1,53 @@ +Help on class RepeatVector in module keras.src.layers.reshaping.repeat_vector: + +class RepeatVector(keras.src.layers.layer.Layer) + | RepeatVector(n, **kwargs) + | + | Repeats the input n times. + | + | Example: + | + | >>> x = keras.Input(shape=(32,)) + | >>> y = keras.layers.RepeatVector(3)(x) + | >>> y.shape + | (None, 3, 32) + | + | Args: + | n: Integer, repetition factor. + | + | Input shape: + | 2D tensor with shape `(batch_size, features)`. + | + | Output shape: + | 3D tensor with shape `(batch_size, n, features)`. + | + | Method resolution order: + | RepeatVector + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | n, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_rescaling.txt b/.tether/man/layer_rescaling.txt new file mode 100644 index 0000000000..958bf2bd6e --- /dev/null +++ b/.tether/man/layer_rescaling.txt @@ -0,0 +1,62 @@ +Help on class Rescaling in module keras.src.layers.preprocessing.rescaling: + +class Rescaling(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | Rescaling(scale, offset=0.0, **kwargs) + | + | A preprocessing layer which rescales input values to a new range. + | + | This layer rescales every value of an input (often an image) by multiplying + | by `scale` and adding `offset`. + | + | For instance: + | + | 1. To rescale an input in the `[0, 255]` range + | to be in the `[0, 1]` range, you would pass `scale=1./255`. + | + | 2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range, + | you would pass `scale=1./127.5, offset=-1`. + | + | The rescaling is applied both during training and inference. Inputs can be + | of integer or floating point dtype, and by default the layer will output + | floats. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | scale: Float, the scale to apply to the inputs. + | offset: Float, the offset to apply to the inputs. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Method resolution order: + | Rescaling + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | scale, + | offset=0.0, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_reshape.txt b/.tether/man/layer_reshape.txt new file mode 100644 index 0000000000..322fe4c41b --- /dev/null +++ b/.tether/man/layer_reshape.txt @@ -0,0 +1,66 @@ +Help on class Reshape in module keras.src.layers.reshaping.reshape: + +class Reshape(keras.src.layers.layer.Layer) + | Reshape(target_shape, **kwargs) + | + | Layer that reshapes inputs into the given shape. + | + | Args: + | target_shape: Target shape. Tuple of integers, does not include the + | samples dimension (batch size). + | + | Input shape: + | Arbitrary, although all dimensions in the input shape must be + | known/fixed. Use the keyword argument `input_shape` (tuple of integers, + | does not include the samples/batch size axis) when using this layer as + | the first layer in a model. + | + | Output shape: + | `(batch_size, *target_shape)` + | + | Example: + | + | >>> x = keras.Input(shape=(12,)) + | >>> y = keras.layers.Reshape((3, 4))(x) + | >>> y.shape + | (None, 3, 4) + | + | >>> # also supports shape inference using `-1` as dimension + | >>> y = keras.layers.Reshape((-1, 2, 2))(x) + | >>> y.shape + | (None, 3, 2, 2) + | + | Method resolution order: + | Reshape + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | target_shape, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_resizing.txt b/.tether/man/layer_resizing.txt new file mode 100644 index 0000000000..54fa7fdbeb --- /dev/null +++ b/.tether/man/layer_resizing.txt @@ -0,0 +1,96 @@ +Help on class Resizing in module keras.src.layers.preprocessing.resizing: + +class Resizing(keras.src.layers.preprocessing.tf_data_layer.TFDataLayer) + | Resizing(height, width, interpolation='bilinear', crop_to_aspect_ratio=False, pad_to_aspect_ratio=False, fill_mode='constant', fill_value=0.0, data_format=None, **kwargs) + | + | A preprocessing layer which resizes images. + | + | This layer resizes an image input to a target height and width. The input + | should be a 4D (batched) or 3D (unbatched) tensor in `"channels_last"` + | format. Input pixel values can be of any range + | (e.g. `[0., 1.)` or `[0, 255]`). + | + | Input shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., height, width, channels)`, in `"channels_last"` format, + | or `(..., channels, height, width)`, in `"channels_first"` format. + | + | Output shape: + | 3D (unbatched) or 4D (batched) tensor with shape: + | `(..., target_height, target_width, channels)`, + | or `(..., channels, target_height, target_width)`, + | in `"channels_first"` format. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | height: Integer, the height of the output shape. + | width: Integer, the width of the output shape. + | interpolation: String, the interpolation method. + | Supports `"bilinear"`, `"nearest"`, `"bicubic"`, + | `"lanczos3"`, `"lanczos5"`. Defaults to `"bilinear"`. + | crop_to_aspect_ratio: If `True`, resize the images without aspect + | ratio distortion. When the original aspect ratio differs + | from the target aspect ratio, the output image will be + | cropped so as to return the + | largest possible window in the image (of size `(height, width)`) + | that matches the target aspect ratio. By default + | (`crop_to_aspect_ratio=False`), aspect ratio may not be preserved. + | pad_to_aspect_ratio: If `True`, pad the images without aspect + | ratio distortion. When the original aspect ratio differs + | from the target aspect ratio, the output image will be + | evenly padded on the short side. + | fill_mode: When using `pad_to_aspect_ratio=True`, padded areas + | are filled according to the given mode. Only `"constant"` is + | supported at this time + | (fill with constant value, equal to `fill_value`). + | fill_value: Float. Padding value to use when `pad_to_aspect_ratio=True`. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json`. If you never set it, then it will be + | `"channels_last"`. + | **kwargs: Base layer keyword arguments, such as `name` and `dtype`. + | + | Method resolution order: + | Resizing + | keras.src.layers.preprocessing.tf_data_layer.TFDataLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | height, + | width, + | interpolation='bilinear', + | crop_to_aspect_ratio=False, + | pad_to_aspect_ratio=False, + | fill_mode='constant', + | fill_value=0.0, + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_rnn.txt b/.tether/man/layer_rnn.txt new file mode 100644 index 0000000000..79b0e334c4 --- /dev/null +++ b/.tether/man/layer_rnn.txt @@ -0,0 +1,259 @@ +Help on class RNN in module keras.src.layers.rnn.rnn: + +class RNN(keras.src.layers.layer.Layer) + | RNN(cell, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, zero_output_for_mask=False, **kwargs) + | + | Base class for recurrent layers. + | + | Args: + | cell: A RNN cell instance or a list of RNN cell instances. + | A RNN cell is a class that has: + | - A `call(input_at_t, states_at_t)` method, returning + | `(output_at_t, states_at_t_plus_1)`. The call method of the + | cell can also take the optional argument `constants`, see + | section "Note on passing external constants" below. + | - A `state_size` attribute. This can be a single integer + | (single state) in which case it is the size of the recurrent + | state. This can also be a list/tuple of integers + | (one size per state). + | - A `output_size` attribute, a single integer. + | - A `get_initial_state(batch_size=None)` + | method that creates a tensor meant to be fed to `call()` as the + | initial state, if the user didn't specify any initial state + | via other means. The returned initial state should have + | shape `(batch_size, cell.state_size)`. + | The cell might choose to create a tensor full of zeros, + | or other values based on the cell's implementation. + | `inputs` is the input tensor to the RNN layer, with shape + | `(batch_size, timesteps, features)`. + | If this method is not implemented + | by the cell, the RNN layer will create a zero filled tensor + | with shape `(batch_size, cell.state_size)`. + | In the case that `cell` is a list of RNN cell instances, the cells + | will be stacked on top of each other in the RNN, resulting in an + | efficient stacked RNN. + | return_sequences: Boolean (default `False`). Whether to return the last + | output in the output sequence, or the full sequence. + | return_state: Boolean (default `False`). + | Whether to return the last state in addition to the output. + | go_backwards: Boolean (default `False`). + | If `True`, process the input sequence backwards and return the + | reversed sequence. + | stateful: Boolean (default `False`). If True, the last state + | for each sample at index `i` in a batch will be used as initial + | state for the sample of index `i` in the following batch. + | unroll: Boolean (default `False`). + | If True, the network will be unrolled, else a symbolic loop will be + | used. Unrolling can speed-up a RNN, although it tends to be more + | memory-intensive. Unrolling is only suitable for short sequences. + | zero_output_for_mask: Boolean (default `False`). + | Whether the output should use zeros for the masked timesteps. + | Note that this field is only used when `return_sequences` + | is `True` and `mask` is provided. + | It can useful if you want to reuse the raw output sequence of + | the RNN without interference from the masked timesteps, e.g., + | merging bidirectional RNNs. + | + | Call arguments: + | inputs: Input tensor. + | initial_state: List of initial state tensors to be passed to the first + | call of the cell. + | mask: Binary tensor of shape `[batch_size, timesteps]` + | indicating whether a given timestep should be masked. + | An individual `True` entry indicates that the corresponding + | timestep should be utilized, while a `False` entry indicates + | that the corresponding timestep should be ignored. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. This argument is passed + | to the cell when calling it. + | This is for use with cells that use dropout. + | + | Input shape: + | 3-D tensor with shape `(batch_size, timesteps, features)`. + | + | Output shape: + | + | - If `return_state`: a list of tensors. The first tensor is + | the output. The remaining tensors are the last states, + | each with shape `(batch_size, state_size)`, where `state_size` could + | be a high dimension tensor shape. + | - If `return_sequences`: 3D tensor with shape + | `(batch_size, timesteps, output_size)`. + | + | Masking: + | + | This layer supports masking for input data with a variable number + | of timesteps. To introduce masks to your data, + | use a `keras.layers.Embedding` layer with the `mask_zero` parameter + | set to `True`. + | + | Note on using statefulness in RNNs: + | + | You can set RNN layers to be 'stateful', which means that the states + | computed for the samples in one batch will be reused as initial states + | for the samples in the next batch. This assumes a one-to-one mapping + | between samples in different successive batches. + | + | To enable statefulness: + | + | - Specify `stateful=True` in the layer constructor. + | - Specify a fixed batch size for your model, by passing + | If sequential model: + | `batch_input_shape=(...)` to the first layer in your model. + | Else for functional model with 1 or more Input layers: + | `batch_shape=(...)` to all the first layers in your model. + | This is the expected shape of your inputs + | *including the batch size*. + | It should be a tuple of integers, e.g. `(32, 10, 100)`. + | - Specify `shuffle=False` when calling `fit()`. + | + | To reset the states of your model, call `.reset_states()` on either + | a specific layer, or on your entire model. + | + | Note on specifying the initial state of RNNs: + | + | You can specify the initial state of RNN layers symbolically by + | calling them with the keyword argument `initial_state`. The value of + | `initial_state` should be a tensor or list of tensors representing + | the initial state of the RNN layer. + | + | You can specify the initial state of RNN layers numerically by + | calling `reset_states` with the keyword argument `states`. The value of + | `states` should be a numpy array or list of numpy arrays representing + | the initial state of the RNN layer. + | + | Examples: + | + | ```python + | from keras.src.layers import RNN + | from keras.src import ops + | + | # First, let's define a RNN Cell, as a layer subclass. + | class MinimalRNNCell(keras.layers.Layer): + | + | def __init__(self, units, **kwargs): + | super().__init__(**kwargs) + | self.units = units + | self.state_size = units + | + | def build(self, input_shape): + | self.kernel = self.add_weight(shape=(input_shape[-1], self.units), + | initializer='uniform', + | name='kernel') + | self.recurrent_kernel = self.add_weight( + | shape=(self.units, self.units), + | initializer='uniform', + | name='recurrent_kernel') + | self.built = True + | + | def call(self, inputs, states): + | prev_output = states[0] + | h = ops.matmul(inputs, self.kernel) + | output = h + ops.matmul(prev_output, self.recurrent_kernel) + | return output, [output] + | + | # Let's use this cell in a RNN layer: + | + | cell = MinimalRNNCell(32) + | x = keras.Input((None, 5)) + | layer = RNN(cell) + | y = layer(x) + | + | # Here's how to use the cell to build a stacked RNN: + | + | cells = [MinimalRNNCell(32), MinimalRNNCell(64)] + | x = keras.Input((None, 5)) + | layer = RNN(cells) + | y = layer(x) + | ``` + | + | Method resolution order: + | RNN + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | cell, + | return_sequences=False, + | return_state=False, + | go_backwards=False, + | stateful=False, + | unroll=False, + | zero_output_for_mask=False, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build( + | self, + | sequences_shape, + | initial_state_shape=None + | ) + | + | call( + | self, + | sequences, + | initial_state=None, + | mask=None, + | training=False + | ) + | + | compute_mask( + | self, + | _, + | mask + | ) + | + | compute_output_shape( + | self, + | sequences_shape, + | initial_state_shape=None + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size) + | + | inner_loop( + | self, + | sequences, + | initial_state, + | mask, + | training=False + | ) + | + | reset_state(self) + | + | reset_states(self) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + diff --git a/.tether/man/layer_separable_conv_1d.txt b/.tether/man/layer_separable_conv_1d.txt new file mode 100644 index 0000000000..038ec500b7 --- /dev/null +++ b/.tether/man/layer_separable_conv_1d.txt @@ -0,0 +1,127 @@ +Help on class SeparableConv1D in module keras.src.layers.convolutional.separable_conv1d: + +class SeparableConv1D(keras.src.layers.convolutional.base_separable_conv.BaseSeparableConv) + | SeparableConv1D(filters, kernel_size, strides=1, padding='valid', data_format=None, dilation_rate=1, depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, **kwargs) + | + | 1D separable convolution layer. + | + | This layer performs a depthwise convolution that acts separately on + | channels, followed by a pointwise convolution that mixes channels. + | If `use_bias` is True and a bias initializer is provided, + | it adds a bias vector to the output. It then optionally applies an + | activation function to produce the final output. + | + | Args: + | filters: int, the dimensionality of the output space (i.e. the number + | of filters in the pointwise convolution). + | kernel_size: int or tuple/list of 1 integers, specifying the size of the + | depthwise convolution window. + | strides: int or tuple/list of 1 integers, specifying the stride length + | of the depthwise convolution. If only one int is specified, the same + | stride size will be used for all dimensions. `strides > 1` is + | incompatible with `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, steps, features)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, features, steps)`. It defaults to the `image_data_format` + | value found in your Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 1 integers, specifying the dilation + | rate to use for dilated convolution. If only one int is specified, + | the same dilation rate will be used for all dimensions. + | depth_multiplier: The number of depthwise convolution output channels + | for each input channel. The total number of depthwise convolution + | output channels will be equal to `input_channel * depth_multiplier`. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | depthwise_initializer: An initializer for the depthwise convolution + | kernel. If None, then the default initializer (`"glorot_uniform"`) + | will be used. + | pointwise_initializer: An initializer for the pointwise convolution + | kernel. If None, then the default initializer (`"glorot_uniform"`) + | will be used. + | bias_initializer: An initializer for the bias vector. If None, the + | default initializer ('"zeros"') will be used. + | depthwise_regularizer: Optional regularizer for the depthwise + | convolution kernel. + | pointwise_regularizer: Optional regularizer for the pointwise + | convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | depthwise_constraint: Optional projection function to be applied to the + | depthwise kernel after being updated by an `Optimizer` (e.g. used + | for norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). + | pointwise_constraint: Optional projection function to be applied to the + | pointwise kernel after being updated by an `Optimizer`. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: `(batch_shape, steps, channels)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: `(batch_shape, channels, steps)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 3D tensor with shape: `(batch_shape, new_steps, filters)` + | - If `data_format="channels_first"`: + | A 3D tensor with shape: `(batch_shape, filters, new_steps)` + | + | Returns: + | A 3D tensor representing + | `activation(separable_conv1d(inputs, kernel) + bias)`. + | + | Example: + | + | >>> x = np.random.rand(4, 10, 12) + | >>> y = keras.layers.SeparableConv1D(3, 4, 3, 2, activation='relu')(x) + | >>> print(y.shape) + | (4, 4, 4) + | + | Method resolution order: + | SeparableConv1D + | keras.src.layers.convolutional.base_separable_conv.BaseSeparableConv + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=1, + | padding='valid', + | data_format=None, + | dilation_rate=1, + | depth_multiplier=1, + | activation=None, + | use_bias=True, + | depthwise_initializer='glorot_uniform', + | pointwise_initializer='glorot_uniform', + | bias_initializer='zeros', + | depthwise_regularizer=None, + | pointwise_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | depthwise_constraint=None, + | pointwise_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_separable_conv_2d.txt b/.tether/man/layer_separable_conv_2d.txt new file mode 100644 index 0000000000..f41ce57d64 --- /dev/null +++ b/.tether/man/layer_separable_conv_2d.txt @@ -0,0 +1,128 @@ +Help on class SeparableConv2D in module keras.src.layers.convolutional.separable_conv2d: + +class SeparableConv2D(keras.src.layers.convolutional.base_separable_conv.BaseSeparableConv) + | SeparableConv2D(filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), depth_multiplier=1, activation=None, use_bias=True, depthwise_initializer='glorot_uniform', pointwise_initializer='glorot_uniform', bias_initializer='zeros', depthwise_regularizer=None, pointwise_regularizer=None, bias_regularizer=None, activity_regularizer=None, depthwise_constraint=None, pointwise_constraint=None, bias_constraint=None, **kwargs) + | + | 2D separable convolution layer. + | + | This layer performs a depthwise convolution that acts separately on + | channels, followed by a pointwise convolution that mixes channels. + | If `use_bias` is True and a bias initializer is provided, + | it adds a bias vector to the output. It then optionally applies an + | activation function to produce the final output. + | + | Args: + | filters: int, the dimensionality of the output space (i.e. the number + | of filters in the pointwise convolution). + | kernel_size: int or tuple/list of 2 integers, specifying the size of the + | depthwise convolution window. + | strides: int or tuple/list of 2 integers, specifying the stride length + | of the depthwise convolution. If only one int is specified, the same + | stride size will be used for all dimensions. `strides > 1` is + | incompatible with `dilation_rate > 1`. + | padding: string, either `"valid"` or `"same"` (case-insensitive). + | `"valid"` means no padding. `"same"` results in padding evenly to + | the left/right or up/down of the input. When `padding="same"` and + | `strides=1`, the output has the same size as the input. + | data_format: string, either `"channels_last"` or `"channels_first"`. + | The ordering of the dimensions in the inputs. `"channels_last"` + | corresponds to inputs with shape `(batch, height, width, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch, channels, height, width)`. It defaults to the + | `image_data_format` value found in your Keras config file + | at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | dilation_rate: int or tuple/list of 2 integers, specifying the dilation + | rate to use for dilated convolution. If only one int is specified, + | the same dilation rate will be used for all dimensions. + | depth_multiplier: The number of depthwise convolution output channels + | for each input channel. The total number of depthwise convolution + | output channels will be equal to `input_channel * depth_multiplier`. + | activation: Activation function. If `None`, no activation is applied. + | use_bias: bool, if `True`, bias will be added to the output. + | depthwise_initializer: An initializer for the depthwise convolution + | kernel. If None, then the default initializer (`"glorot_uniform"`) + | will be used. + | pointwise_initializer: An initializer for the pointwise convolution + | kernel. If None, then the default initializer (`"glorot_uniform"`) + | will be used. + | bias_initializer: An initializer for the bias vector. If None, the + | default initializer ('"zeros"') will be used. + | depthwise_regularizer: Optional regularizer for the depthwise + | convolution kernel. + | pointwise_regularizer: Optional regularizer for the pointwise + | convolution kernel. + | bias_regularizer: Optional regularizer for the bias vector. + | activity_regularizer: Optional regularizer function for the output. + | depthwise_constraint: Optional projection function to be applied to the + | depthwise kernel after being updated by an `Optimizer` (e.g. used + | for norm constraints or value constraints for layer weights). The + | function must take as input the unprojected variable and must return + | the projected variable (which must have the same shape). + | pointwise_constraint: Optional projection function to be applied to the + | pointwise kernel after being updated by an `Optimizer`. + | bias_constraint: Optional projection function to be applied to the + | bias after being updated by an `Optimizer`. + | + | Input shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: `(batch_size, height, width, channels)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: `(batch_size, channels, height, width)` + | + | Output shape: + | - If `data_format="channels_last"`: + | A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` + | - If `data_format="channels_first"`: + | A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` + | + | Returns: + | A 4D tensor representing + | `activation(separable_conv2d(inputs, kernel) + bias)`. + | + | Example: + | + | >>> x = np.random.rand(4, 10, 10, 12) + | >>> y = keras.layers.SeparableConv2D(3, 4, 3, 2, activation='relu')(x) + | >>> print(y.shape) + | (4, 4, 4, 4) + | + | Method resolution order: + | SeparableConv2D + | keras.src.layers.convolutional.base_separable_conv.BaseSeparableConv + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filters, + | kernel_size, + | strides=(1, 1), + | padding='valid', + | data_format=None, + | dilation_rate=(1, 1), + | depth_multiplier=1, + | activation=None, + | use_bias=True, + | depthwise_initializer='glorot_uniform', + | pointwise_initializer='glorot_uniform', + | bias_initializer='zeros', + | depthwise_regularizer=None, + | pointwise_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | depthwise_constraint=None, + | pointwise_constraint=None, + | bias_constraint=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_simple_rnn.txt b/.tether/man/layer_simple_rnn.txt new file mode 100644 index 0000000000..3979991041 --- /dev/null +++ b/.tether/man/layer_simple_rnn.txt @@ -0,0 +1,191 @@ +Help on class SimpleRNN in module keras.src.layers.rnn.simple_rnn: + +class SimpleRNN(keras.src.layers.rnn.rnn.RNN) + | SimpleRNN(units, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, seed=None, **kwargs) + | + | Fully-connected RNN where the output is to be fed back as the new input. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. + | Default: hyperbolic tangent (`tanh`). + | If you pass None, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer uses + | a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation of the recurrent + | state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | activity_regularizer: Regularizer function applied to the output of the + | layer (its "activation"). Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. + | Fraction of the units to drop for the linear transformation + | of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. + | Fraction of the units to drop for the linear transformation of the + | recurrent state. Default: 0. + | return_sequences: Boolean. Whether to return the last output + | in the output sequence, or the full sequence. Default: `False`. + | return_state: Boolean. Whether to return the last state + | in addition to the output. Default: `False`. + | go_backwards: Boolean (default: `False`). + | If `True`, process the input sequence backwards and return the + | reversed sequence. + | stateful: Boolean (default: `False`). If `True`, the last state + | for each sample at index i in a batch will be used as initial + | state for the sample of index i in the following batch. + | unroll: Boolean (default: `False`). + | If `True`, the network will be unrolled, + | else a symbolic loop will be used. + | Unrolling can speed-up a RNN, + | although it tends to be more memory-intensive. + | Unrolling is only suitable for short sequences. + | + | Call arguments: + | sequence: A 3D tensor, with shape `[batch, timesteps, feature]`. + | mask: Binary tensor of shape `[batch, timesteps]` indicating whether + | a given timestep should be masked. An individual `True` entry + | indicates that the corresponding timestep should be utilized, + | while a `False` entry indicates that the corresponding timestep + | should be ignored. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. + | This argument is passed to the cell when calling it. + | This is only relevant if `dropout` or `recurrent_dropout` is used. + | initial_state: List of initial state tensors to be passed to the first + | call of the cell. + | + | Example: + | + | ```python + | inputs = np.random.random((32, 10, 8)) + | simple_rnn = keras.layers.SimpleRNN(4) + | output = simple_rnn(inputs) # The output has shape `(32, 4)`. + | simple_rnn = keras.layers.SimpleRNN( + | 4, return_sequences=True, return_state=True + | ) + | # whole_sequence_output has shape `(32, 10, 4)`. + | # final_state has shape `(32, 4)`. + | whole_sequence_output, final_state = simple_rnn(inputs) + | ``` + | + | Method resolution order: + | SimpleRNN + | keras.src.layers.rnn.rnn.RNN + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | activity_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | return_sequences=False, + | return_state=False, + | go_backwards=False, + | stateful=False, + | unroll=False, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | sequences, + | initial_state=None, + | mask=None, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | activation + | + | bias_constraint + | + | bias_initializer + | + | bias_regularizer + | + | dropout + | + | kernel_constraint + | + | kernel_initializer + | + | kernel_regularizer + | + | recurrent_constraint + | + | recurrent_dropout + | + | recurrent_initializer + | + | recurrent_regularizer + | + | units + | + | use_bias + | + diff --git a/.tether/man/layer_simple_rnn_cell.txt b/.tether/man/layer_simple_rnn_cell.txt new file mode 100644 index 0000000000..f865615b2e --- /dev/null +++ b/.tether/man/layer_simple_rnn_cell.txt @@ -0,0 +1,118 @@ +Help on class SimpleRNNCell in module keras.src.layers.rnn.simple_rnn: + +class SimpleRNNCell(keras.src.layers.layer.Layer, keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell) + | SimpleRNNCell(units, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, **kwargs) + | + | Cell class for SimpleRNN. + | + | This class processes one step within the whole time sequence input, whereas + | `keras.layer.SimpleRNN` processes the whole sequence. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. + | Default: hyperbolic tangent (`tanh`). + | If you pass `None`, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation + | of the recurrent state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | seed: Random seed for dropout. + | + | Call arguments: + | sequence: A 2D tensor, with shape `(batch, features)`. + | states: A 2D tensor with shape `(batch, units)`, which is the state + | from the previous time step. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. Only relevant when `dropout` or + | `recurrent_dropout` is used. + | + | Example: + | + | ```python + | inputs = np.random.random([32, 10, 8]).astype(np.float32) + | rnn = keras.layers.RNN(keras.layers.SimpleRNNCell(4)) + | output = rnn(inputs) # The output has shape `(32, 4)`. + | rnn = keras.layers.RNN( + | keras.layers.SimpleRNNCell(4), + | return_sequences=True, + | return_state=True + | ) + | # whole_sequence_output has shape `(32, 10, 4)`. + | # final_state has shape `(32, 4)`. + | whole_sequence_output, final_state = rnn(inputs) + | ``` + | + | Method resolution order: + | SimpleRNNCell + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | sequence, + | states, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | diff --git a/.tether/man/layer_spatial_dropout_1d.txt b/.tether/man/layer_spatial_dropout_1d.txt new file mode 100644 index 0000000000..c7c1259368 --- /dev/null +++ b/.tether/man/layer_spatial_dropout_1d.txt @@ -0,0 +1,57 @@ +Help on class SpatialDropout1D in module keras.src.layers.regularization.spatial_dropout: + +class SpatialDropout1D(BaseSpatialDropout) + | SpatialDropout1D(rate, seed=None, name=None, dtype=None) + | + | Spatial 1D version of Dropout. + | + | This layer performs the same function as Dropout, however, it drops + | entire 1D feature maps instead of individual elements. If adjacent frames + | within feature maps are strongly correlated (as is normally the case in + | early convolution layers) then regular dropout will not regularize the + | activations and will otherwise just result in an effective learning rate + | decrease. In this case, `SpatialDropout1D` will help promote independence + | between feature maps and should be used instead. + | + | Args: + | rate: Float between 0 and 1. Fraction of the input units to drop. + | + | Call arguments: + | inputs: A 3D tensor. + | training: Python boolean indicating whether the layer + | should behave in training mode (applying dropout) + | or in inference mode (pass-through). + | + | Input shape: + | 3D tensor with shape: `(samples, timesteps, channels)` + | + | Output shape: Same as input. + | + | Reference: + | + | - [Tompson et al., 2014](https://arxiv.org/abs/1411.4280) + | + | Method resolution order: + | SpatialDropout1D + | BaseSpatialDropout + | keras.src.layers.regularization.dropout.Dropout + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | rate, + | seed=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/layer_spatial_dropout_2d.txt b/.tether/man/layer_spatial_dropout_2d.txt new file mode 100644 index 0000000000..640f2e1ecd --- /dev/null +++ b/.tether/man/layer_spatial_dropout_2d.txt @@ -0,0 +1,73 @@ +Help on class SpatialDropout2D in module keras.src.layers.regularization.spatial_dropout: + +class SpatialDropout2D(BaseSpatialDropout) + | SpatialDropout2D(rate, data_format=None, seed=None, name=None, dtype=None) + | + | Spatial 2D version of Dropout. + | + | This version performs the same function as Dropout, however, it drops + | entire 2D feature maps instead of individual elements. If adjacent pixels + | within feature maps are strongly correlated (as is normally the case in + | early convolution layers) then regular dropout will not regularize the + | activations and will otherwise just result in an effective learning rate + | decrease. In this case, `SpatialDropout2D` will help promote independence + | between feature maps and should be used instead. + | + | Args: + | rate: Float between 0 and 1. Fraction of the input units to drop. + | data_format: `"channels_first"` or `"channels_last"`. + | In `"channels_first"` mode, the channels dimension (the depth) + | is at index 1, in `"channels_last"` mode is it at index 3. + | It defaults to the `image_data_format` value found in your + | Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | + | Call arguments: + | inputs: A 4D tensor. + | training: Python boolean indicating whether the layer + | should behave in training mode (applying dropout) + | or in inference mode (pass-through). + | + | Input shape: + | 4D tensor with shape: `(samples, channels, rows, cols)` if + | data_format='channels_first' + | or 4D tensor with shape: `(samples, rows, cols, channels)` if + | data_format='channels_last'. + | + | Output shape: Same as input. + | + | Reference: + | + | - [Tompson et al., 2014](https://arxiv.org/abs/1411.4280) + | + | Method resolution order: + | SpatialDropout2D + | BaseSpatialDropout + | keras.src.layers.regularization.dropout.Dropout + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | rate, + | data_format=None, + | seed=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_spatial_dropout_3d.txt b/.tether/man/layer_spatial_dropout_3d.txt new file mode 100644 index 0000000000..3722c7a59d --- /dev/null +++ b/.tether/man/layer_spatial_dropout_3d.txt @@ -0,0 +1,73 @@ +Help on class SpatialDropout3D in module keras.src.layers.regularization.spatial_dropout: + +class SpatialDropout3D(BaseSpatialDropout) + | SpatialDropout3D(rate, data_format=None, seed=None, name=None, dtype=None) + | + | Spatial 3D version of Dropout. + | + | This version performs the same function as Dropout, however, it drops + | entire 3D feature maps instead of individual elements. If adjacent voxels + | within feature maps are strongly correlated (as is normally the case in + | early convolution layers) then regular dropout will not regularize the + | activations and will otherwise just result in an effective learning rate + | decrease. In this case, SpatialDropout3D will help promote independence + | between feature maps and should be used instead. + | + | Args: + | rate: Float between 0 and 1. Fraction of the input units to drop. + | data_format: `"channels_first"` or `"channels_last"`. + | In `"channels_first"` mode, the channels dimension (the depth) + | is at index 1, in `"channels_last"` mode is it at index 4. + | It defaults to the `image_data_format` value found in your + | Keras config file at `~/.keras/keras.json`. + | If you never set it, then it will be `"channels_last"`. + | + | Call arguments: + | inputs: A 5D tensor. + | training: Python boolean indicating whether the layer + | should behave in training mode (applying dropout) + | or in inference mode (pass-through). + | + | Input shape: + | 5D tensor with shape: `(samples, channels, dim1, dim2, dim3)` if + | data_format='channels_first' + | or 5D tensor with shape: `(samples, dim1, dim2, dim3, channels)` if + | data_format='channels_last'. + | + | Output shape: Same as input. + | + | Reference: + | + | - [Tompson et al., 2014](https://arxiv.org/abs/1411.4280) + | + | Method resolution order: + | SpatialDropout3D + | BaseSpatialDropout + | keras.src.layers.regularization.dropout.Dropout + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | rate, + | data_format=None, + | seed=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_spectral_normalization.txt b/.tether/man/layer_spectral_normalization.txt new file mode 100644 index 0000000000..277b1307ea --- /dev/null +++ b/.tether/man/layer_spectral_normalization.txt @@ -0,0 +1,81 @@ +Help on class SpectralNormalization in module keras.src.layers.normalization.spectral_normalization: + +class SpectralNormalization(keras.src.layers.core.wrapper.Wrapper) + | SpectralNormalization(layer, power_iterations=1, **kwargs) + | + | Performs spectral normalization on the weights of a target layer. + | + | This wrapper controls the Lipschitz constant of the weights of a layer by + | constraining their spectral norm, which can stabilize the training of GANs. + | + | Args: + | layer: A `keras.layers.Layer` instance that + | has either a `kernel` (e.g. `Conv2D`, `Dense`...) + | or an `embeddings` attribute (`Embedding` layer). + | power_iterations: int, the number of iterations during normalization. + | **kwargs: Base wrapper keyword arguments. + | + | Examples: + | + | Wrap `keras.layers.Conv2D`: + | >>> x = np.random.rand(1, 10, 10, 1) + | >>> conv2d = SpectralNormalization(keras.layers.Conv2D(2, 2)) + | >>> y = conv2d(x) + | >>> y.shape + | (1, 9, 9, 2) + | + | Wrap `keras.layers.Dense`: + | >>> x = np.random.rand(1, 10, 10, 1) + | >>> dense = SpectralNormalization(keras.layers.Dense(10)) + | >>> y = dense(x) + | >>> y.shape + | (1, 10, 10, 10) + | + | Reference: + | + | - [Spectral Normalization for GAN](https://arxiv.org/abs/1802.05957). + | + | Method resolution order: + | SpectralNormalization + | keras.src.layers.core.wrapper.Wrapper + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | layer, + | power_iterations=1, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | training=False + | ) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | normalized_weights(self) + | Generate spectral normalized weights. + | + | This method returns the updated value for `self.kernel` with the + | spectral normalized value, so that the layer is ready for `call()`. + | + diff --git a/.tether/man/layer_stacked_rnn_cells.txt b/.tether/man/layer_stacked_rnn_cells.txt new file mode 100644 index 0000000000..4189a9e550 --- /dev/null +++ b/.tether/man/layer_stacked_rnn_cells.txt @@ -0,0 +1,90 @@ +Help on class StackedRNNCells in module keras.src.layers.rnn.stacked_rnn_cells: + +class StackedRNNCells(keras.src.layers.layer.Layer) + | StackedRNNCells(cells, **kwargs) + | + | Wrapper allowing a stack of RNN cells to behave as a single cell. + | + | Used to implement efficient stacked RNNs. + | + | Args: + | cells: List of RNN cell instances. + | + | Examples: + | + | ```python + | batch_size = 3 + | sentence_length = 5 + | num_features = 2 + | new_shape = (batch_size, sentence_length, num_features) + | x = np.reshape(np.arange(30), new_shape) + | + | rnn_cells = [keras.layers.LSTMCell(128) for _ in range(2)] + | stacked_lstm = keras.layers.StackedRNNCells(rnn_cells) + | lstm_layer = keras.layers.RNN(stacked_lstm) + | + | result = lstm_layer(x) + | ``` + | + | Method resolution order: + | StackedRNNCells + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | cells, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | states, + | training=False, + | **kwargs + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | output_size + | + | state_size + | diff --git a/.tether/man/layer_string_lookup.txt b/.tether/man/layer_string_lookup.txt new file mode 100644 index 0000000000..6529c6e35b --- /dev/null +++ b/.tether/man/layer_string_lookup.txt @@ -0,0 +1,359 @@ +Help on class StringLookup in module keras.src.layers.preprocessing.string_lookup: + +class StringLookup(keras.src.layers.preprocessing.index_lookup.IndexLookup) + | StringLookup(max_tokens=None, num_oov_indices=1, mask_token=None, oov_token='[UNK]', vocabulary=None, idf_weights=None, invert=False, output_mode='int', pad_to_max_tokens=False, sparse=False, encoding='utf-8', name=None, **kwargs) + | + | A preprocessing layer that maps strings to (possibly encoded) indices. + | + | This layer translates a set of arbitrary strings into integer output via a + | table-based vocabulary lookup. This layer will perform no splitting or + | transformation of input strings. For a layer than can split and tokenize + | natural language, see the `keras.layers.TextVectorization` layer. + | + | The vocabulary for the layer must be either supplied on construction or + | learned via `adapt()`. During `adapt()`, the layer will analyze a data set, + | determine the frequency of individual strings tokens, and create a + | vocabulary from them. If the vocabulary is capped in size, the most frequent + | tokens will be used to create the vocabulary and all others will be treated + | as out-of-vocabulary (OOV). + | + | There are two possible output modes for the layer. + | When `output_mode` is `"int"`, + | input strings are converted to their index in the vocabulary (an integer). + | When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input strings + | are encoded into an array where each dimension corresponds to an element in + | the vocabulary. + | + | The vocabulary can optionally contain a mask token as well as an OOV token + | (which can optionally occupy multiple indices in the vocabulary, as set + | by `num_oov_indices`). + | The position of these tokens in the vocabulary is fixed. When `output_mode` + | is `"int"`, the vocabulary will begin with the mask token (if set), followed + | by OOV indices, followed by the rest of the vocabulary. When `output_mode` + | is `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with + | OOV indices and instances of the mask token will be dropped. + | + | **Note:** This layer uses TensorFlow internally. It cannot + | be used as part of the compiled computation graph of a model with + | any backend other than TensorFlow. + | It can however be used with any backend when running eagerly. + | It can also always be used as part of an input preprocessing pipeline + | with any backend (outside the model itself), which is how we recommend + | to use this layer. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | max_tokens: Maximum size of the vocabulary for this layer. This should + | only be specified when adapting the vocabulary or when setting + | `pad_to_max_tokens=True`. If None, there is no cap on the size of + | the vocabulary. Note that this size includes the OOV + | and mask tokens. Defaults to `None`. + | num_oov_indices: The number of out-of-vocabulary tokens to use. + | If this value is more than 1, OOV inputs are modulated to + | determine their OOV value. + | If this value is 0, OOV inputs will cause an error when calling + | the layer. Defaults to `1`. + | mask_token: A token that represents masked inputs. When `output_mode` is + | `"int"`, the token is included in vocabulary and mapped to index 0. + | In other output modes, the token will not appear + | in the vocabulary and instances of the mask token + | in the input will be dropped. If set to `None`, + | no mask term will be added. Defaults to `None`. + | oov_token: Only used when `invert` is True. The token to return for OOV + | indices. Defaults to `"[UNK]"`. + | vocabulary: Optional. Either an array of integers or a string path to a + | text file. If passing an array, can pass a tuple, list, + | 1D NumPy array, or 1D tensor containing the integer vocbulary terms. + | If passing a file path, the file should contain one line per term + | in the vocabulary. If this argument is set, + | there is no need to `adapt()` the layer. + | vocabulary_dtype: The dtype of the vocabulary terms, for example + | `"int64"` or `"int32"`. Defaults to `"int64"`. + | idf_weights: Only valid when `output_mode` is `"tf_idf"`. + | A tuple, list, 1D NumPy array, or 1D tensor or the same length + | as the vocabulary, containing the floating point inverse document + | frequency weights, which will be multiplied by per sample term + | counts for the final TF-IDF weight. + | If the `vocabulary` argument is set, and `output_mode` is + | `"tf_idf"`, this argument must be supplied. + | invert: Only valid when `output_mode` is `"int"`. + | If `True`, this layer will map indices to vocabulary items + | instead of mapping vocabulary items to indices. + | Defaults to `False`. + | output_mode: Specification for the output of the layer. Values can be + | `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` + | configuring the layer as follows: + | - `"int"`: Return the vocabulary indices of the input tokens. + | - `"one_hot"`: Encodes each individual element in the input into an + | array the same size as the vocabulary, + | containing a 1 at the element index. If the last dimension + | is size 1, will encode on that dimension. + | If the last dimension is not size 1, will append a new + | dimension for the encoded output. + | - `"multi_hot"`: Encodes each sample in the input into a single + | array the same size as the vocabulary, + | containing a 1 for each vocabulary term present in the sample. + | Treats the last dimension as the sample dimension, + | if input shape is `(..., sample_length)`, + | output shape will be `(..., num_tokens)`. + | - `"count"`: As `"multi_hot"`, but the int array contains + | a count of the number of times the token at that index + | appeared in the sample. + | - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is + | applied to find the value in each token slot. + | For `"int"` output, any shape of input and output is supported. + | For all other output modes, currently only output up to rank 2 + | is supported. Defaults to `"int"`. + | pad_to_max_tokens: Only applicable when `output_mode` is `"multi_hot"`, + | `"count"`, or `"tf_idf"`. If `True`, the output will have + | its feature axis padded to `max_tokens` even if the number + | of unique tokens in the vocabulary is less than `max_tokens`, + | resulting in a tensor of shape `(batch_size, max_tokens)` + | regardless of vocabulary size. Defaults to `False`. + | sparse: Boolean. Only applicable to `"multi_hot"`, `"count"`, and + | `"tf_idf"` output modes. Only supported with TensorFlow + | backend. If `True`, returns a `SparseTensor` + | instead of a dense `Tensor`. Defaults to `False`. + | encoding: Optional. The text encoding to use to interpret the input + | strings. Defaults to `"utf-8"`. + | + | Examples: + | + | **Creating a lookup layer with a known vocabulary** + | + | This example creates a lookup layer with a pre-existing vocabulary. + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> data = [["a", "c", "d"], ["d", "z", "b"]] + | >>> layer = StringLookup(vocabulary=vocab) + | >>> layer(data) + | array([[1, 3, 4], + | [4, 0, 2]]) + | + | **Creating a lookup layer with an adapted vocabulary** + | + | This example creates a lookup layer and generates the vocabulary by + | analyzing the dataset. + | + | >>> data = [["a", "c", "d"], ["d", "z", "b"]] + | >>> layer = StringLookup() + | >>> layer.adapt(data) + | >>> layer.get_vocabulary() + | ['[UNK]', 'd', 'z', 'c', 'b', 'a'] + | + | Note that the OOV token `"[UNK]"` has been added to the vocabulary. + | The remaining tokens are sorted by frequency + | (`"d"`, which has 2 occurrences, is first) then by inverse sort order. + | + | >>> data = [["a", "c", "d"], ["d", "z", "b"]] + | >>> layer = StringLookup() + | >>> layer.adapt(data) + | >>> layer(data) + | array([[5, 3, 1], + | [1, 2, 4]]) + | + | **Lookups with multiple OOV indices** + | + | This example demonstrates how to use a lookup layer with multiple OOV + | indices. When a layer is created with more than one OOV index, any OOV + | values are hashed into the number of OOV buckets, distributing OOV values in + | a deterministic fashion across the set. + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> data = [["a", "c", "d"], ["m", "z", "b"]] + | >>> layer = StringLookup(vocabulary=vocab, num_oov_indices=2) + | >>> layer(data) + | array([[2, 4, 5], + | [0, 1, 3]]) + | + | Note that the output for OOV value 'm' is 0, while the output for OOV value + | `"z"` is 1. The in-vocab terms have their output index increased by 1 from + | earlier examples (a maps to 2, etc) in order to make space for the extra OOV + | value. + | + | **One-hot output** + | + | Configure the layer with `output_mode='one_hot'`. Note that the first + | `num_oov_indices` dimensions in the ont_hot encoding represent OOV values. + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> data = ["a", "b", "c", "d", "z"] + | >>> layer = StringLookup(vocabulary=vocab, output_mode='one_hot') + | >>> layer(data) + | array([[0., 1., 0., 0., 0.], + | [0., 0., 1., 0., 0.], + | [0., 0., 0., 1., 0.], + | [0., 0., 0., 0., 1.], + | [1., 0., 0., 0., 0.]], dtype=float32) + | + | **Multi-hot output** + | + | Configure the layer with `output_mode='multi_hot'`. Note that the first + | `num_oov_indices` dimensions in the multi_hot encoding represent OOV values. + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> data = [["a", "c", "d", "d"], ["d", "z", "b", "z"]] + | >>> layer = StringLookup(vocabulary=vocab, output_mode='multi_hot') + | >>> layer(data) + | array([[0., 1., 0., 1., 1.], + | [1., 0., 1., 0., 1.]], dtype=float32) + | + | **Token count output** + | + | Configure the layer with `output_mode='count'`. As with multi_hot output, + | the first `num_oov_indices` dimensions in the output represent OOV values. + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> data = [["a", "c", "d", "d"], ["d", "z", "b", "z"]] + | >>> layer = StringLookup(vocabulary=vocab, output_mode='count') + | >>> layer(data) + | array([[0., 1., 0., 1., 2.], + | [2., 0., 1., 0., 1.]], dtype=float32) + | + | **TF-IDF output** + | + | Configure the layer with `output_mode="tf_idf"`. As with multi_hot output, + | the first `num_oov_indices` dimensions in the output represent OOV values. + | + | Each token bin will output `token_count * idf_weight`, where the idf weights + | are the inverse document frequency weights per token. These should be + | provided along with the vocabulary. Note that the `idf_weight` for OOV + | values will default to the average of all idf weights passed in. + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> idf_weights = [0.25, 0.75, 0.6, 0.4] + | >>> data = [["a", "c", "d", "d"], ["d", "z", "b", "z"]] + | >>> layer = StringLookup(output_mode="tf_idf") + | >>> layer.set_vocabulary(vocab, idf_weights=idf_weights) + | >>> layer(data) + | array([[0. , 0.25, 0. , 0.6 , 0.8 ], + | [1.0 , 0. , 0.75, 0. , 0.4 ]], dtype=float32) + | + | To specify the idf weights for oov values, you will need to pass the entire + | vocabulary including the leading oov token. + | + | >>> vocab = ["[UNK]", "a", "b", "c", "d"] + | >>> idf_weights = [0.9, 0.25, 0.75, 0.6, 0.4] + | >>> data = [["a", "c", "d", "d"], ["d", "z", "b", "z"]] + | >>> layer = StringLookup(output_mode="tf_idf") + | >>> layer.set_vocabulary(vocab, idf_weights=idf_weights) + | >>> layer(data) + | array([[0. , 0.25, 0. , 0.6 , 0.8 ], + | [1.8 , 0. , 0.75, 0. , 0.4 ]], dtype=float32) + | + | When adapting the layer in `"tf_idf"` mode, each input sample will be + | considered a document, and IDF weight per token will be calculated as + | `log(1 + num_documents / (1 + token_document_count))`. + | + | **Inverse lookup** + | + | This example demonstrates how to map indices to strings using this layer. + | (You can also use `adapt()` with `inverse=True`, but for simplicity we'll + | pass the vocab in this example.) + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> data = [[1, 3, 4], [4, 0, 2]] + | >>> layer = StringLookup(vocabulary=vocab, invert=True) + | >>> layer(data) + | array([[b'a', b'c', b'd'], + | [b'd', b'[UNK]', b'b']], dtype=object) + | + | Note that the first index correspond to the oov token by default. + | + | + | **Forward and inverse lookup pairs** + | + | This example demonstrates how to use the vocabulary of a standard lookup + | layer to create an inverse lookup layer. + | + | >>> vocab = ["a", "b", "c", "d"] + | >>> data = [["a", "c", "d"], ["d", "z", "b"]] + | >>> layer = StringLookup(vocabulary=vocab) + | >>> i_layer = StringLookup(vocabulary=vocab, invert=True) + | >>> int_data = layer(data) + | >>> i_layer(int_data) + | array([[b'a', b'c', b'd'], + | [b'd', b'[UNK]', b'b']], dtype=object) + | + | In this example, the input value `"z"` resulted in an output of `"[UNK]"`, + | since 1000 was not in the vocabulary - it got represented as an OOV, and all + | OOV values are returned as `"[UNK]"` in the inverse layer. Also, note that + | for the inverse to work, you must have already set the forward layer + | vocabulary either directly or via `adapt()` before calling + | `get_vocabulary()`. + | + | Method resolution order: + | StringLookup + | keras.src.layers.preprocessing.index_lookup.IndexLookup + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | max_tokens=None, + | num_oov_indices=1, + | mask_token=None, + | oov_token='[UNK]', + | vocabulary=None, + | idf_weights=None, + | invert=False, + | output_mode='int', + | pad_to_max_tokens=False, + | sparse=False, + | encoding='utf-8', + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | adapt( + | self, + | data, + | steps=None + | ) + | Computes a vocabulary of integer terms from tokens in a dataset. + | + | Calling `adapt()` on a `StringLookup` layer is an alternative to passing + | in a precomputed vocabulary on construction via the `vocabulary` + | argument. A `StringLookup` layer should always be either adapted over a + | dataset or supplied with a vocabulary. + | + | During `adapt()`, the layer will build a vocabulary of all string tokens + | seen in the dataset, sorted by occurrence count, with ties broken by + | sort order of the tokens (high to low). At the end of `adapt()`, if + | `max_tokens` is set, the vocabulary will be truncated to `max_tokens` + | size. For example, adapting a layer with `max_tokens=1000` will compute + | the 1000 most frequent tokens occurring in the input dataset. If + | `output_mode='tf-idf'`, `adapt()` will also learn the document + | frequencies of each token in the input dataset. + | + | Arguments: + | data: The data to train on. It can be passed either as a + | batched `tf.data.Dataset`, as a list of strings, + | or as a NumPy array. + | steps: Integer or `None`. + | Total number of steps (batches of samples) to process. + | If `data` is a `tf.data.Dataset`, and `steps` is `None`, + | `adapt()` will run until the input dataset is exhausted. + | When passing an infinitely + | repeating dataset, you must specify the `steps` argument. This + | argument is not supported with array inputs or list inputs. + | + | call(self, inputs) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_subtract.txt b/.tether/man/layer_subtract.txt new file mode 100644 index 0000000000..738a037243 --- /dev/null +++ b/.tether/man/layer_subtract.txt @@ -0,0 +1,45 @@ +Help on class Subtract in module keras.src.layers.merging.subtract: + +class Subtract(keras.src.layers.merging.base_merge.Merge) + | Subtract(**kwargs) + | + | Performs elementwise subtraction. + | + | It takes as input a list of tensors of size 2 both of the + | same shape, and returns a single tensor (inputs[0] - inputs[1]) + | of same shape. + | + | Examples: + | + | >>> input_shape = (2, 3, 4) + | >>> x1 = np.random.rand(*input_shape) + | >>> x2 = np.random.rand(*input_shape) + | >>> y = keras.layers.Subtract()([x1, x2]) + | + | Usage in a Keras model: + | + | >>> input1 = keras.layers.Input(shape=(16,)) + | >>> x1 = keras.layers.Dense(8, activation='relu')(input1) + | >>> input2 = keras.layers.Input(shape=(32,)) + | >>> x2 = keras.layers.Dense(8, activation='relu')(input2) + | >>> # equivalent to `subtracted = keras.layers.subtract([x1, x2])` + | >>> subtracted = keras.layers.Subtract()([x1, x2]) + | >>> out = keras.layers.Dense(4)(subtracted) + | >>> model = keras.models.Model(inputs=[input1, input2], outputs=out) + | + | Method resolution order: + | Subtract + | keras.src.layers.merging.base_merge.Merge + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | build(self, input_shape) + | + diff --git a/.tether/man/layer_text_vectorization.txt b/.tether/man/layer_text_vectorization.txt new file mode 100644 index 0000000000..f78a04cec2 --- /dev/null +++ b/.tether/man/layer_text_vectorization.txt @@ -0,0 +1,369 @@ +Help on class TextVectorization in module keras.src.layers.preprocessing.text_vectorization: + +class TextVectorization(keras.src.layers.layer.Layer) + | TextVectorization(max_tokens=None, standardize='lower_and_strip_punctuation', split='whitespace', ngrams=None, output_mode='int', output_sequence_length=None, pad_to_max_tokens=False, vocabulary=None, idf_weights=None, sparse=False, ragged=False, encoding='utf-8', name=None, **kwargs) + | + | A preprocessing layer which maps text features to integer sequences. + | + | This layer has basic options for managing text in a Keras model. It + | transforms a batch of strings (one example = one string) into either a list + | of token indices (one example = 1D tensor of integer token indices) or a + | dense representation (one example = 1D tensor of float values representing + | data about the example's tokens). This layer is meant to handle natural + | language inputs. To handle simple string inputs (categorical strings or + | pre-tokenized strings) see `kers_core.layers.StringLookup`. + | + | The vocabulary for the layer must be either supplied on construction or + | learned via `adapt()`. When this layer is adapted, it will analyze the + | dataset, determine the frequency of individual string values, and create a + | vocabulary from them. This vocabulary can have unlimited size or be capped, + | depending on the configuration options for this layer; if there are more + | unique values in the input than the maximum vocabulary size, the most + | frequent terms will be used to create the vocabulary. + | + | The processing of each example contains the following steps: + | + | 1. Standardize each example (usually lowercasing + punctuation stripping) + | 2. Split each example into substrings (usually words) + | 3. Recombine substrings into tokens (usually ngrams) + | 4. Index tokens (associate a unique int value with each token) + | 5. Transform each example using this index, either into a vector of ints or + | a dense float vector. + | + | Some notes on passing callables to customize splitting and normalization for + | this layer: + | + | 1. Any callable can be passed to this Layer, but if you want to serialize + | this object you should only pass functions that are registered Keras + | serializables (see `keras.saving.register_keras_serializable` + | for more details). + | 2. When using a custom callable for `standardize`, the data received + | by the callable will be exactly as passed to this layer. The callable + | should return a tensor of the same shape as the input. + | 3. When using a custom callable for `split`, the data received by the + | callable will have the 1st dimension squeezed out - instead of + | `[["string to split"], ["another string to split"]]`, the Callable will + | see `["string to split", "another string to split"]`. + | The callable should return a `tf.Tensor` of dtype `string` + | with the first dimension containing the split tokens - + | in this example, we should see something like `[["string", "to", + | "split"], ["another", "string", "to", "split"]]`. + | + | **Note:** This layer uses TensorFlow internally. It cannot + | be used as part of the compiled computation graph of a model with + | any backend other than TensorFlow. + | It can however be used with any backend when running eagerly. + | It can also always be used as part of an input preprocessing pipeline + | with any backend (outside the model itself), which is how we recommend + | to use this layer. + | + | **Note:** This layer is safe to use inside a `tf.data` pipeline + | (independently of which backend you're using). + | + | Args: + | max_tokens: Maximum size of the vocabulary for this layer. This should + | only be specified when adapting a vocabulary or when setting + | `pad_to_max_tokens=True`. Note that this vocabulary + | contains 1 OOV token, so the effective number of tokens is + | `(max_tokens - 1 - (1 if output_mode == "int" else 0))`. + | standardize: Optional specification for standardization to apply to the + | input text. Values can be: + | - `None`: No standardization. + | - `"lower_and_strip_punctuation"`: Text will be lowercased and all + | punctuation removed. + | - `"lower"`: Text will be lowercased. + | - `"strip_punctuation"`: All punctuation will be removed. + | - Callable: Inputs will passed to the callable function, + | which should be standardized and returned. + | split: Optional specification for splitting the input text. + | Values can be: + | - `None`: No splitting. + | - `"whitespace"`: Split on whitespace. + | - `"character"`: Split on each unicode character. + | - Callable: Standardized inputs will passed to the callable + | function, which should be split and returned. + | ngrams: Optional specification for ngrams to create from the + | possibly-split input text. Values can be `None`, an integer + | or tuple of integers; passing an integer will create ngrams + | up to that integer, and passing a tuple of integers will + | create ngrams for the specified values in the tuple. + | Passing `None` means that no ngrams will be created. + | output_mode: Optional specification for the output of the layer. + | Values can be `"int"`, `"multi_hot"`, `"count"` or `"tf_idf"`, + | configuring the layer as follows: + | - `"int"`: Outputs integer indices, one integer index per split + | string token. When `output_mode == "int"`, + | 0 is reserved for masked locations; + | this reduces the vocab size to `max_tokens - 2` + | instead of `max_tokens - 1`. + | - `"multi_hot"`: Outputs a single int array per batch, of either + | vocab_size or max_tokens size, containing 1s in all elements + | where the token mapped to that index exists at least + | once in the batch item. + | - `"count"`: Like `"multi_hot"`, but the int array contains + | a count of the number of times the token at that index + | appeared in the batch item. + | - `"tf_idf"`: Like `"multi_hot"`, but the TF-IDF algorithm + | is applied to find the value in each token slot. + | For `"int"` output, any shape of input and output is supported. + | For all other output modes, currently only rank 1 inputs + | (and rank 2 outputs after splitting) are supported. + | output_sequence_length: Only valid in INT mode. If set, the output will + | have its time dimension padded or truncated to exactly + | `output_sequence_length` values, resulting in a tensor of shape + | `(batch_size, output_sequence_length)` regardless of how many tokens + | resulted from the splitting step. Defaults to `None`. If `ragged` + | is `True` then `output_sequence_length` may still truncate the + | output. + | pad_to_max_tokens: Only valid in `"multi_hot"`, `"count"`, + | and `"tf_idf"` modes. If `True`, the output will have + | its feature axis padded to `max_tokens` even if the number + | of unique tokens in the vocabulary is less than `max_tokens`, + | resulting in a tensor of shape `(batch_size, max_tokens)` + | regardless of vocabulary size. Defaults to `False`. + | vocabulary: Optional. Either an array of strings or a string path to a + | text file. If passing an array, can pass a tuple, list, + | 1D NumPy array, or 1D tensor containing the string vocabulary terms. + | If passing a file path, the file should contain one line per term + | in the vocabulary. If this argument is set, + | there is no need to `adapt()` the layer. + | idf_weights: Only valid when `output_mode` is `"tf_idf"`. A tuple, list, + | 1D NumPy array, or 1D tensor of the same length as the vocabulary, + | containing the floating point inverse document frequency weights, + | which will be multiplied by per sample term counts for + | the final `tf_idf` weight. If the `vocabulary` argument is set, + | and `output_mode` is `"tf_idf"`, this argument must be supplied. + | ragged: Boolean. Only applicable to `"int"` output mode. + | Only supported with TensorFlow backend. + | If `True`, returns a `RaggedTensor` instead of a dense `Tensor`, + | where each sequence may have a different length + | after string splitting. Defaults to `False`. + | sparse: Boolean. Only applicable to `"multi_hot"`, `"count"`, and + | `"tf_idf"` output modes. Only supported with TensorFlow + | backend. If `True`, returns a `SparseTensor` + | instead of a dense `Tensor`. Defaults to `False`. + | encoding: Optional. The text encoding to use to interpret the input + | strings. Defaults to `"utf-8"`. + | + | Examples: + | + | This example instantiates a `TextVectorization` layer that lowercases text, + | splits on whitespace, strips punctuation, and outputs integer vocab indices. + | + | >>> max_tokens = 5000 # Maximum vocab size. + | >>> max_len = 4 # Sequence length to pad the outputs to. + | >>> # Create the layer. + | >>> vectorize_layer = TextVectorization( + | ... max_tokens=max_tokens, + | ... output_mode='int', + | ... output_sequence_length=max_len) + | + | >>> # Now that the vocab layer has been created, call `adapt` on the + | >>> # list of strings to create the vocabulary. + | >>> vectorize_layer.adapt(["foo bar", "bar baz", "baz bada boom"]) + | + | >>> # Now, the layer can map strings to integers -- you can use an + | >>> # embedding layer to map these integers to learned embeddings. + | >>> input_data = [["foo qux bar"], ["qux baz"]] + | >>> vectorize_layer(input_data) + | array([[4, 1, 3, 0], + | [1, 2, 0, 0]]) + | + | This example instantiates a `TextVectorization` layer by passing a list + | of vocabulary terms to the layer's `__init__()` method. + | + | >>> vocab_data = ["earth", "wind", "and", "fire"] + | >>> max_len = 4 # Sequence length to pad the outputs to. + | >>> # Create the layer, passing the vocab directly. You can also pass the + | >>> # vocabulary arg a path to a file containing one vocabulary word per + | >>> # line. + | >>> vectorize_layer = keras.layers.TextVectorization( + | ... max_tokens=max_tokens, + | ... output_mode='int', + | ... output_sequence_length=max_len, + | ... vocabulary=vocab_data) + | + | >>> # Because we've passed the vocabulary directly, we don't need to adapt + | >>> # the layer - the vocabulary is already set. The vocabulary contains the + | >>> # padding token ('') and OOV token ('[UNK]') + | >>> # as well as the passed tokens. + | >>> vectorize_layer.get_vocabulary() + | ['', '[UNK]', 'earth', 'wind', 'and', 'fire'] + | + | Method resolution order: + | TextVectorization + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | max_tokens=None, + | standardize='lower_and_strip_punctuation', + | split='whitespace', + | ngrams=None, + | output_mode='int', + | output_sequence_length=None, + | pad_to_max_tokens=False, + | vocabulary=None, + | idf_weights=None, + | sparse=False, + | ragged=False, + | encoding='utf-8', + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | adapt( + | self, + | data, + | batch_size=None, + | steps=None + | ) + | Computes a vocabulary of string terms from tokens in a dataset. + | + | Calling `adapt()` on a `TextVectorization` layer is an alternative to + | passing in a precomputed vocabulary on construction via the `vocabulary` + | argument. A `TextVectorization` layer should always be either adapted + | over a dataset or supplied with a vocabulary. + | + | During `adapt()`, the layer will build a vocabulary of all string tokens + | seen in the dataset, sorted by occurrence count, with ties broken by + | sort order of the tokens (high to low). At the end of `adapt()`, if + | `max_tokens` is set, the vocabulary will be truncated to `max_tokens` + | size. For example, adapting a layer with `max_tokens=1000` will compute + | the 1000 most frequent tokens occurring in the input dataset. If + | `output_mode='tf-idf'`, `adapt()` will also learn the document + | frequencies of each token in the input dataset. + | + | Arguments: + | data: The data to train on. It can be passed either as a + | batched `tf.data.Dataset`, as a list of strings, + | or as a NumPy array. + | steps: Integer or `None`. + | Total number of steps (batches of samples) to process. + | If `data` is a `tf.data.Dataset`, and `steps` is `None`, + | `adapt()` will run until the input dataset is exhausted. + | When passing an infinitely + | repeating dataset, you must specify the `steps` argument. This + | argument is not supported with array inputs or list inputs. + | + | build(self, input_shape=None) + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | compute_output_spec(self, inputs) + | + | finalize_state(self) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_vocabulary(self, include_special_tokens=True) + | Returns the current vocabulary of the layer. + | + | Args: + | include_special_tokens: If `True`, the returned vocabulary + | will include the padding and OOV tokens, + | and a term's index in the vocabulary will equal + | the term's index when calling the layer. If `False`, the + | returned vocabulary will not include any padding + | or OOV tokens. + | + | load_assets(self, dir_path) + | + | load_own_variables(self, store) + | Loads the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is loaded upon calling `keras.models.load_model()`. + | + | Args: + | store: Dict from which the state of the model will be loaded. + | + | reset_state(self) + | + | save_assets(self, dir_path) + | + | save_own_variables(self, store) + | Saves the state of the layer. + | + | You can override this method to take full control of how the state of + | the layer is saved upon calling `model.save()`. + | + | Args: + | store: Dict where the state of the model will be saved. + | + | set_vocabulary( + | self, + | vocabulary, + | idf_weights=None + | ) + | Sets vocabulary (and optionally document frequency) for this layer. + | + | This method sets the vocabulary and IDF weights for this layer directly, + | instead of analyzing a dataset through `adapt()`. It should be used + | whenever the vocab (and optionally document frequency) information is + | already known. If vocabulary data is already present in the layer, this + | method will replace it. + | + | Args: + | vocabulary: Either an array or a string path to a text file. + | If passing an array, can pass a tuple, list, 1D NumPy array, + | or 1D tensor containing the vocbulary terms. + | If passing a file path, the file should contain one line + | per term in the vocabulary. + | idf_weights: A tuple, list, 1D NumPy array, or 1D tensor of inverse + | document frequency weights with equal length to vocabulary. + | Must be set if `output_mode` is `"tf_idf"`. + | Should not be set otherwise. + | + | update_state(self, data) + | + | vocabulary_size(self) + | Gets the current size of the layer's vocabulary. + | + | Returns: + | The integer size of the vocabulary, including optional + | mask and OOV indices. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | compute_dtype + | The dtype of the computations performed by the layer. + | + | variable_dtype + | The dtype of the state (weights) of the layer. + | + diff --git a/.tether/man/layer_tfsm.txt b/.tether/man/layer_tfsm.txt new file mode 100644 index 0000000000..c7ae96274a --- /dev/null +++ b/.tether/man/layer_tfsm.txt @@ -0,0 +1,79 @@ +Help on class TFSMLayer in module keras.src.export.export_lib: + +class TFSMLayer(keras.src.layers.layer.Layer) + | TFSMLayer(filepath, call_endpoint='serve', call_training_endpoint=None, trainable=True, name=None, dtype=None) + | + | Reload a Keras model/layer that was saved via SavedModel / ExportArchive. + | + | Arguments: + | filepath: `str` or `pathlib.Path` object. The path to the SavedModel. + | call_endpoint: Name of the endpoint to use as the `call()` method + | of the reloaded layer. If the SavedModel was created + | via `model.export()`, + | then the default endpoint name is `'serve'`. In other cases + | it may be named `'serving_default'`. + | + | Example: + | + | ```python + | model.export("path/to/artifact") + | reloaded_layer = TFSMLayer("path/to/artifact") + | outputs = reloaded_layer(inputs) + | ``` + | + | The reloaded object can be used like a regular Keras layer, and supports + | training/fine-tuning of its trainable weights. Note that the reloaded + | object retains none of the internal structure or custom methods of the + | original object -- it's a brand new layer created around the saved + | function. + | + | **Limitations:** + | + | * Only call endpoints with a single `inputs` tensor argument + | (which may optionally be a dict/tuple/list of tensors) are supported. + | For endpoints with multiple separate input tensor arguments, consider + | subclassing `TFSMLayer` and implementing a `call()` method with a + | custom signature. + | * If you need training-time behavior to differ from inference-time behavior + | (i.e. if you need the reloaded object to support a `training=True` argument + | in `__call__()`), make sure that the training-time call function is + | saved as a standalone endpoint in the artifact, and provide its name + | to the `TFSMLayer` via the `call_training_endpoint` argument. + | + | Method resolution order: + | TFSMLayer + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | filepath, + | call_endpoint='serve', + | call_training_endpoint=None, + | trainable=True, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | inputs, + | training=False, + | **kwargs + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_time_distributed.txt b/.tether/man/layer_time_distributed.txt new file mode 100644 index 0000000000..dcc6fabdb2 --- /dev/null +++ b/.tether/man/layer_time_distributed.txt @@ -0,0 +1,71 @@ +Help on class TimeDistributed in module keras.src.layers.rnn.time_distributed: + +class TimeDistributed(keras.src.layers.core.wrapper.Wrapper) + | TimeDistributed(layer, **kwargs) + | + | This wrapper allows to apply a layer to every temporal slice of an input. + | + | Every input should be at least 3D, and the dimension of index one of the + | first input will be considered to be the temporal dimension. + | + | Consider a batch of 32 video samples, where each sample is a 128x128 RGB + | image with `channels_last` data format, across 10 timesteps. + | The batch input shape is `(32, 10, 128, 128, 3)`. + | + | You can then use `TimeDistributed` to apply the same `Conv2D` layer to each + | of the 10 timesteps, independently: + | + | >>> inputs = layers.Input(shape=(10, 128, 128, 3), batch_size=32) + | >>> conv_2d_layer = layers.Conv2D(64, (3, 3)) + | >>> outputs = layers.TimeDistributed(conv_2d_layer)(inputs) + | >>> outputs.shape + | (32, 10, 126, 126, 64) + | + | Because `TimeDistributed` applies the same instance of `Conv2D` to each of + | the timestamps, the same set of weights are used at each timestamp. + | + | Args: + | layer: a `keras.layers.Layer` instance. + | + | Call arguments: + | inputs: Input tensor of shape (batch, time, ...) or nested tensors, + | and each of which has shape (batch, time, ...). + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. This argument is passed to the + | wrapped layer (only if the layer supports this argument). + | mask: Binary tensor of shape `(samples, timesteps)` indicating whether + | a given timestep should be masked. This argument is passed to the + | wrapped layer (only if the layer supports this argument). + | + | Method resolution order: + | TimeDistributed + | keras.src.layers.core.wrapper.Wrapper + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | layer, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | training=None, + | mask=None + | ) + | + | compute_output_shape(self, input_shape) + | + diff --git a/.tether/man/layer_torch_module_wrapper.txt b/.tether/man/layer_torch_module_wrapper.txt new file mode 100644 index 0000000000..e1a1ffe96d --- /dev/null +++ b/.tether/man/layer_torch_module_wrapper.txt @@ -0,0 +1,131 @@ +Help on class TorchModuleWrapper in module keras.src.utils.torch_utils: + +class TorchModuleWrapper(keras.src.layers.layer.Layer) + | TorchModuleWrapper(module, name=None, **kwargs) + | + | Torch module wrapper layer. + | + | `TorchModuleWrapper` is a wrapper class that can turn any + | `torch.nn.Module` into a Keras layer, in particular by making its + | parameters trackable by Keras. + | + | Args: + | module: `torch.nn.Module` instance. If it's a `LazyModule` + | instance, then its parameters must be initialized before + | passing the instance to `TorchModuleWrapper` (e.g. by calling + | it once). + | name: The name of the layer (string). + | + | Example: + | + | Here's an example of how the `TorchModuleWrapper` can be used with vanilla + | PyTorch modules. + | + | ```python + | import torch.nn as nn + | import torch.nn.functional as F + | + | import keras + | from keras.src.layers import TorchModuleWrapper + | + | class Classifier(keras.Model): + | def __init__(self, **kwargs): + | super().__init__(**kwargs) + | # Wrap `torch.nn.Module`s with `TorchModuleWrapper` + | # if they contain parameters + | self.conv1 = TorchModuleWrapper( + | nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3, 3)) + | ) + | self.conv2 = TorchModuleWrapper( + | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3)) + | ) + | self.pool = nn.MaxPool2d(kernel_size=(2, 2)) + | self.flatten = nn.Flatten() + | self.dropout = nn.Dropout(p=0.5) + | self.fc = TorchModuleWrapper(nn.Linear(1600, 10)) + | + | def call(self, inputs): + | x = F.relu(self.conv1(inputs)) + | x = self.pool(x) + | x = F.relu(self.conv2(x)) + | x = self.pool(x) + | x = self.flatten(x) + | x = self.dropout(x) + | x = self.fc(x) + | return F.softmax(x, dim=1) + | + | + | model = Classifier() + | model.build((1, 28, 28)) + | print("Output shape:", model(torch.ones(1, 1, 28, 28).to("cuda")).shape) + | + | model.compile( + | loss="sparse_categorical_crossentropy", + | optimizer="adam", + | metrics=["accuracy"] + | ) + | model.fit(train_loader, epochs=5) + | ``` + | + | Method resolution order: + | TorchModuleWrapper + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | module, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call( + | self, + | *args, + | **kwargs + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | load_own_variables(self, store) + | Loads model's state via `state_dict`. + | + | parameters(self, recurse=True) + | + | save_own_variables(self, store) + | Saves model's state from `state_dict`. + | `model.parameters` excludes some of model's state like + | `BatchNorm` mean and variance. So, use `state_dict` to obtain + | all of model's state. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + diff --git a/.tether/man/layer_unit_normalization.txt b/.tether/man/layer_unit_normalization.txt new file mode 100644 index 0000000000..e3f1bc982b --- /dev/null +++ b/.tether/man/layer_unit_normalization.txt @@ -0,0 +1,55 @@ +Help on class UnitNormalization in module keras.src.layers.normalization.unit_normalization: + +class UnitNormalization(keras.src.layers.layer.Layer) + | UnitNormalization(axis=-1, **kwargs) + | + | Unit normalization layer. + | + | Normalize a batch of inputs so that each input in the batch has a L2 norm + | equal to 1 (across the axes specified in `axis`). + | + | Example: + | + | >>> data = np.arange(6).reshape(2, 3) + | >>> normalized_data = keras.layers.UnitNormalization()(data) + | >>> print(np.sum(normalized_data[0, :] ** 2) + | 1.0 + | + | Args: + | axis: Integer or list/tuple. The axis or axes to normalize across. + | Typically, this is the features axis or axes. The left-out axes are + | typically the batch axis or axes. `-1` is the last dimension + | in the input. Defaults to `-1`. + | + | Method resolution order: + | UnitNormalization + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axis=-1, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_upsampling_1d.txt b/.tether/man/layer_upsampling_1d.txt new file mode 100644 index 0000000000..116b46cfa4 --- /dev/null +++ b/.tether/man/layer_upsampling_1d.txt @@ -0,0 +1,69 @@ +Help on class UpSampling1D in module keras.src.layers.reshaping.up_sampling1d: + +class UpSampling1D(keras.src.layers.layer.Layer) + | UpSampling1D(size=2, **kwargs) + | + | Upsampling layer for 1D inputs. + | + | Repeats each temporal step `size` times along the time axis. + | + | Example: + | + | >>> input_shape = (2, 2, 3) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> x + | [[[ 0 1 2] + | [ 3 4 5]] + | [[ 6 7 8] + | [ 9 10 11]]] + | >>> y = keras.layers.UpSampling1D(size=2)(x) + | >>> y + | [[[ 0. 1. 2.] + | [ 0. 1. 2.] + | [ 3. 4. 5.] + | [ 3. 4. 5.]] + | + | [[ 6. 7. 8.] + | [ 6. 7. 8.] + | [ 9. 10. 11.] + | [ 9. 10. 11.]]] + | + | Args: + | size: Integer. Upsampling factor. + | + | Input shape: + | 3D tensor with shape: `(batch_size, steps, features)`. + | + | Output shape: + | 3D tensor with shape: `(batch_size, upsampled_steps, features)`. + | + | Method resolution order: + | UpSampling1D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | size=2, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_upsampling_2d.txt b/.tether/man/layer_upsampling_2d.txt new file mode 100644 index 0000000000..9ecb9a47aa --- /dev/null +++ b/.tether/man/layer_upsampling_2d.txt @@ -0,0 +1,94 @@ +Help on class UpSampling2D in module keras.src.layers.reshaping.up_sampling2d: + +class UpSampling2D(keras.src.layers.layer.Layer) + | UpSampling2D(size=(2, 2), data_format=None, interpolation='nearest', **kwargs) + | + | Upsampling layer for 2D inputs. + | + | The implementation uses interpolative resizing, given the resize method + | (specified by the `interpolation` argument). Use `interpolation=nearest` + | to repeat the rows and columns of the data. + | + | Example: + | + | >>> input_shape = (2, 2, 1, 3) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> print(x) + | [[[[ 0 1 2]] + | [[ 3 4 5]]] + | [[[ 6 7 8]] + | [[ 9 10 11]]]] + | >>> y = keras.layers.UpSampling2D(size=(1, 2))(x) + | >>> print(y) + | [[[[ 0 1 2] + | [ 0 1 2]] + | [[ 3 4 5] + | [ 3 4 5]]] + | [[[ 6 7 8] + | [ 6 7 8]] + | [[ 9 10 11] + | [ 9 10 11]]]] + | + | Args: + | size: Int, or tuple of 2 integers. + | The upsampling factors for rows and columns. + | data_format: A string, + | one of `"channels_last"` (default) or `"channels_first"`. + | The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch_size, height, width, channels)` while `"channels_first"` + | corresponds to inputs with shape + | `(batch_size, channels, height, width)`. + | When unspecified, uses + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json` (if exists) else `"channels_last"`. + | Defaults to `"channels_last"`. + | interpolation: A string, one of `"bicubic"`, `"bilinear"`, `"lanczos3"`, + | `"lanczos5"`, `"nearest"`. + | + | Input shape: + | 4D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, rows, cols, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, rows, cols)` + | + | Output shape: + | 4D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, upsampled_rows, upsampled_cols, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, upsampled_rows, upsampled_cols)` + | + | Method resolution order: + | UpSampling2D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | size=(2, 2), + | data_format=None, + | interpolation='nearest', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_upsampling_3d.txt b/.tether/man/layer_upsampling_3d.txt new file mode 100644 index 0000000000..12a1f70f57 --- /dev/null +++ b/.tether/man/layer_upsampling_3d.txt @@ -0,0 +1,80 @@ +Help on class UpSampling3D in module keras.src.layers.reshaping.up_sampling3d: + +class UpSampling3D(keras.src.layers.layer.Layer) + | UpSampling3D(size=(2, 2, 2), data_format=None, **kwargs) + | + | Upsampling layer for 3D inputs. + | + | Repeats the 1st, 2nd and 3rd dimensions + | of the data by `size[0]`, `size[1]` and `size[2]` respectively. + | + | Example: + | + | >>> input_shape = (2, 1, 2, 1, 3) + | >>> x = np.ones(input_shape) + | >>> y = keras.layers.UpSampling3D(size=(2, 2, 2))(x) + | >>> y.shape + | (2, 2, 4, 2, 3) + | + | Args: + | size: Int, or tuple of 3 integers. + | The upsampling factors for dim1, dim2 and dim3. + | data_format: A string, + | one of `"channels_last"` (default) or `"channels_first"`. + | The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | When unspecified, uses + | `image_data_format` value found in your Keras config file at + | `~/.keras/keras.json` (if exists) else `"channels_last"`. + | Defaults to `"channels_last"`. + | + | Input shape: + | 5D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, dim1, dim2, dim3, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, dim1, dim2, dim3)` + | + | Output shape: + | 5D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, upsampled_dim1, upsampled_dim2, upsampled_dim3, + | channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, upsampled_dim1, upsampled_dim2, + | upsampled_dim3)` + | + | Method resolution order: + | UpSampling3D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | size=(2, 2, 2), + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_zero_padding_1d.txt b/.tether/man/layer_zero_padding_1d.txt new file mode 100644 index 0000000000..5e2e695de6 --- /dev/null +++ b/.tether/man/layer_zero_padding_1d.txt @@ -0,0 +1,74 @@ +Help on class ZeroPadding1D in module keras.src.layers.reshaping.zero_padding1d: + +class ZeroPadding1D(keras.src.layers.layer.Layer) + | ZeroPadding1D(padding=1, **kwargs) + | + | Zero-padding layer for 1D input (e.g. temporal sequence). + | + | Example: + | + | >>> input_shape = (2, 2, 3) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> x + | [[[ 0 1 2] + | [ 3 4 5]] + | [[ 6 7 8] + | [ 9 10 11]]] + | >>> y = keras.layers.ZeroPadding1D(padding=2)(x) + | >>> y + | [[[ 0 0 0] + | [ 0 0 0] + | [ 0 1 2] + | [ 3 4 5] + | [ 0 0 0] + | [ 0 0 0]] + | [[ 0 0 0] + | [ 0 0 0] + | [ 6 7 8] + | [ 9 10 11] + | [ 0 0 0] + | [ 0 0 0]]] + | + | Args: + | padding: Int, or tuple of int (length 2), or dictionary. + | - If int: how many zeros to add at the beginning and end of + | the padding dimension (axis 1). + | - If tuple of 2 ints: how many zeros to add at the beginning and the + | end of the padding dimension (`(left_pad, right_pad)`). + | + | Input shape: + | 3D tensor with shape `(batch_size, axis_to_pad, features)` + | + | Output shape: + | 3D tensor with shape `(batch_size, padded_axis, features)` + | + | Method resolution order: + | ZeroPadding1D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | padding=1, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_zero_padding_2d.txt b/.tether/man/layer_zero_padding_2d.txt new file mode 100644 index 0000000000..5a991b1f43 --- /dev/null +++ b/.tether/man/layer_zero_padding_2d.txt @@ -0,0 +1,95 @@ +Help on class ZeroPadding2D in module keras.src.layers.reshaping.zero_padding2d: + +class ZeroPadding2D(keras.src.layers.layer.Layer) + | ZeroPadding2D(padding=(1, 1), data_format=None, **kwargs) + | + | Zero-padding layer for 2D input (e.g. picture). + | + | This layer can add rows and columns of zeros at the top, bottom, left and + | right side of an image tensor. + | + | Example: + | + | >>> input_shape = (1, 1, 2, 2) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> x + | [[[[0 1] + | [2 3]]]] + | >>> y = keras.layers.ZeroPadding2D(padding=1)(x) + | >>> y + | [[[[0 0] + | [0 0] + | [0 0] + | [0 0]] + | [[0 0] + | [0 1] + | [2 3] + | [0 0]] + | [[0 0] + | [0 0] + | [0 0] + | [0 0]]]] + | + | Args: + | padding: Int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. + | - If int: the same symmetric padding is applied to height and width. + | - If tuple of 2 ints: interpreted as two different symmetric padding + | values for height and width: + | `(symmetric_height_pad, symmetric_width_pad)`. + | - If tuple of 2 tuples of 2 ints: interpreted as + | `((top_pad, bottom_pad), (left_pad, right_pad))`. + | data_format: A string, one of `"channels_last"` (default) or + | `"channels_first"`. The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch_size, height, width, channels)` while `"channels_first"` + | corresponds to inputs with shape + | `(batch_size, channels, height, width)`. + | When unspecified, uses `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json` (if exists). Defaults to + | `"channels_last"`. + | + | Input shape: + | 4D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, height, width, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, height, width)` + | + | Output shape: + | 4D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, padded_height, padded_width, channels)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, channels, padded_height, padded_width)` + | + | Method resolution order: + | ZeroPadding2D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | padding=(1, 1), + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/layer_zero_padding_3d.txt b/.tether/man/layer_zero_padding_3d.txt new file mode 100644 index 0000000000..a7a44b49f5 --- /dev/null +++ b/.tether/man/layer_zero_padding_3d.txt @@ -0,0 +1,84 @@ +Help on class ZeroPadding3D in module keras.src.layers.reshaping.zero_padding3d: + +class ZeroPadding3D(keras.src.layers.layer.Layer) + | ZeroPadding3D(padding=((1, 1), (1, 1), (1, 1)), data_format=None, **kwargs) + | + | Zero-padding layer for 3D data (spatial or spatio-temporal). + | + | Example: + | + | >>> input_shape = (1, 1, 2, 2, 3) + | >>> x = np.arange(np.prod(input_shape)).reshape(input_shape) + | >>> y = keras.layers.ZeroPadding3D(padding=2)(x) + | >>> y.shape + | (1, 5, 6, 6, 3) + | + | Args: + | padding: Int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. + | - If int: the same symmetric padding is applied to depth, height, + | and width. + | - If tuple of 3 ints: interpreted as three different symmetric + | padding values for depth, height, and width: + | `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`. + | - If tuple of 3 tuples of 2 ints: interpreted as + | `((left_dim1_pad, right_dim1_pad), (left_dim2_pad, + | right_dim2_pad), (left_dim3_pad, right_dim3_pad))`. + | data_format: A string, one of `"channels_last"` (default) or + | `"channels_first"`. The ordering of the dimensions in the inputs. + | `"channels_last"` corresponds to inputs with shape + | `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` + | while `"channels_first"` corresponds to inputs with shape + | `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. + | When unspecified, uses `image_data_format` value found in your Keras + | config file at `~/.keras/keras.json` (if exists). Defaults to + | `"channels_last"`. + | + | Input shape: + | 5D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, first_axis_to_pad, second_axis_to_pad, + | third_axis_to_pad, depth)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, depth, first_axis_to_pad, second_axis_to_pad, + | third_axis_to_pad)` + | + | Output shape: + | 5D tensor with shape: + | - If `data_format` is `"channels_last"`: + | `(batch_size, first_padded_axis, second_padded_axis, + | third_axis_to_pad, depth)` + | - If `data_format` is `"channels_first"`: + | `(batch_size, depth, first_padded_axis, second_padded_axis, + | third_axis_to_pad)` + | + | Method resolution order: + | ZeroPadding3D + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | padding=((1, 1), (1, 1), (1, 1)), + | data_format=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | call(self, inputs) + | + | compute_output_shape(self, input_shape) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + diff --git a/.tether/man/learning_rate_schedule_cosine_decay.txt b/.tether/man/learning_rate_schedule_cosine_decay.txt new file mode 100644 index 0000000000..32480d9c5e --- /dev/null +++ b/.tether/man/learning_rate_schedule_cosine_decay.txt @@ -0,0 +1,122 @@ +Help on class CosineDecay in module keras.src.optimizers.schedules.learning_rate_schedule: + +class CosineDecay(LearningRateSchedule) + | CosineDecay(initial_learning_rate, decay_steps, alpha=0.0, name='CosineDecay', warmup_target=None, warmup_steps=0) + | + | A `LearningRateSchedule` that uses a cosine decay with optional warmup. + | + | See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), + | SGDR: Stochastic Gradient Descent with Warm Restarts. + | + | For the idea of a linear warmup of our learning rate, + | see [Goyal et al.](https://arxiv.org/pdf/1706.02677.pdf). + | + | When we begin training a model, we often want an initial increase in our + | learning rate followed by a decay. If `warmup_target` is an int, this + | schedule applies a linear increase per optimizer step to our learning rate + | from `initial_learning_rate` to `warmup_target` for a duration of + | `warmup_steps`. Afterwards, it applies a cosine decay function taking our + | learning rate from `warmup_target` to `alpha` for a duration of + | `decay_steps`. If `warmup_target` is None we skip warmup and our decay + | will take our learning rate from `initial_learning_rate` to `alpha`. + | It requires a `step` value to compute the learning rate. You can + | just pass a backend variable that you increment at each training step. + | + | The schedule is a 1-arg callable that produces a warmup followed by a + | decayed learning rate when passed the current optimizer step. This can be + | useful for changing the learning rate value across different invocations of + | optimizer functions. + | + | Our warmup is computed as: + | + | ```python + | def warmup_learning_rate(step): + | completed_fraction = step / warmup_steps + | total_delta = target_warmup - initial_learning_rate + | return completed_fraction * total_delta + | ``` + | + | And our decay is computed as: + | + | ```python + | if warmup_target is None: + | initial_decay_lr = initial_learning_rate + | else: + | initial_decay_lr = warmup_target + | + | def decayed_learning_rate(step): + | step = min(step, decay_steps) + | cosine_decay = 0.5 * (1 + cos(pi * step / decay_steps)) + | decayed = (1 - alpha) * cosine_decay + alpha + | return initial_decay_lr * decayed + | ``` + | + | Example usage without warmup: + | + | ```python + | decay_steps = 1000 + | initial_learning_rate = 0.1 + | lr_decayed_fn = keras.optimizers.schedules.CosineDecay( + | initial_learning_rate, decay_steps) + | ``` + | + | Example usage with warmup: + | + | ```python + | decay_steps = 1000 + | initial_learning_rate = 0 + | warmup_steps = 1000 + | target_learning_rate = 0.1 + | lr_warmup_decayed_fn = keras.optimizers.schedules.CosineDecay( + | initial_learning_rate, decay_steps, warmup_target=target_learning_rate, + | warmup_steps=warmup_steps + | ) + | ``` + | + | You can pass this schedule directly into a `keras.optimizers.Optimizer` + | as the learning rate. The learning rate schedule is also serializable and + | deserializable using `keras.optimizers.schedules.serialize` and + | `keras.optimizers.schedules.deserialize`. + | + | Args: + | initial_learning_rate: A Python float. The initial learning rate. + | decay_steps: A Python int. Number of steps to decay over. + | alpha: A Python float. Minimum learning rate value for decay as a + | fraction of `initial_learning_rate`. + | name: String. Optional name of the operation. Defaults to + | `"CosineDecay"`. + | warmup_target: A Python float. The target learning rate for our + | warmup phase. Will cast to the `initial_learning_rate` datatype. + | Setting to `None` will skip warmup and begins decay phase from + | `initial_learning_rate`. Otherwise scheduler will warmup from + | `initial_learning_rate` to `warmup_target`. + | warmup_steps: A Python int. Number of steps to warmup over. + | + | Returns: + | A 1-arg callable learning rate schedule that takes the current optimizer + | step and outputs the decayed learning rate, a scalar tensor of the + | same type as `initial_learning_rate`. + | + | Method resolution order: + | CosineDecay + | LearningRateSchedule + | builtins.object + | + | Methods defined here: + | + | __call__(self, step) + | Call self as a function. + | + | __init__( + | self, + | initial_learning_rate, + | decay_steps, + | alpha=0.0, + | name='CosineDecay', + | warmup_target=None, + | warmup_steps=0 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/learning_rate_schedule_cosine_decay_restarts.txt b/.tether/man/learning_rate_schedule_cosine_decay_restarts.txt new file mode 100644 index 0000000000..6a7d32fc40 --- /dev/null +++ b/.tether/man/learning_rate_schedule_cosine_decay_restarts.txt @@ -0,0 +1,80 @@ +Help on class CosineDecayRestarts in module keras.src.optimizers.schedules.learning_rate_schedule: + +class CosineDecayRestarts(LearningRateSchedule) + | CosineDecayRestarts(initial_learning_rate, first_decay_steps, t_mul=2.0, m_mul=1.0, alpha=0.0, name='SGDRDecay') + | + | A `LearningRateSchedule` that uses a cosine decay schedule with restarts. + | + | See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), + | SGDR: Stochastic Gradient Descent with Warm Restarts. + | + | When training a model, it is often useful to lower the learning rate as + | the training progresses. This schedule applies a cosine decay function with + | restarts to an optimizer step, given a provided initial learning rate. + | It requires a `step` value to compute the decayed learning rate. You can + | just pass a backend variable that you increment at each training step. + | + | The schedule is a 1-arg callable that produces a decayed learning + | rate when passed the current optimizer step. This can be useful for changing + | the learning rate value across different invocations of optimizer functions. + | + | The learning rate multiplier first decays + | from 1 to `alpha` for `first_decay_steps` steps. Then, a warm + | restart is performed. Each new warm restart runs for `t_mul` times more + | steps and with `m_mul` times initial learning rate as the new learning rate. + | + | Example: + | ```python + | first_decay_steps = 1000 + | lr_decayed_fn = ( + | keras.optimizers.schedules.CosineDecayRestarts( + | initial_learning_rate, + | first_decay_steps)) + | ``` + | + | You can pass this schedule directly into a `keras.optimizers.Optimizer` + | as the learning rate. The learning rate schedule is also serializable and + | deserializable using `keras.optimizers.schedules.serialize` and + | `keras.optimizers.schedules.deserialize`. + | + | Args: + | initial_learning_rate: A Python float. The initial learning rate. + | first_decay_steps: A Python integer. Number of steps to decay over. + | t_mul: A Python float. Used to derive the number of iterations in + | the i-th period. + | m_mul: A Python float. Used to derive the initial learning rate of + | the i-th period. + | alpha: A Python float. Minimum learning rate value as a fraction of + | the `initial_learning_rate`. + | name: String. Optional name of the operation. Defaults to + | `"SGDRDecay"`. + | + | Returns: + | A 1-arg callable learning rate schedule that takes the current optimizer + | step and outputs the decayed learning rate, a scalar tensor of the + | same type as `initial_learning_rate`. + | + | Method resolution order: + | CosineDecayRestarts + | LearningRateSchedule + | builtins.object + | + | Methods defined here: + | + | __call__(self, step) + | Call self as a function. + | + | __init__( + | self, + | initial_learning_rate, + | first_decay_steps, + | t_mul=2.0, + | m_mul=1.0, + | alpha=0.0, + | name='SGDRDecay' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | + diff --git a/.tether/man/learning_rate_schedule_exponential_decay.txt b/.tether/man/learning_rate_schedule_exponential_decay.txt new file mode 100644 index 0000000000..2cb1164a91 --- /dev/null +++ b/.tether/man/learning_rate_schedule_exponential_decay.txt @@ -0,0 +1,86 @@ +Help on class ExponentialDecay in module keras.src.optimizers.schedules.learning_rate_schedule: + +class ExponentialDecay(LearningRateSchedule) + | ExponentialDecay(initial_learning_rate, decay_steps, decay_rate, staircase=False, name='ExponentialDecay') + | + | A `LearningRateSchedule` that uses an exponential decay schedule. + | + | When training a model, it is often useful to lower the learning rate as + | the training progresses. This schedule applies an exponential decay function + | to an optimizer step, given a provided initial learning rate. + | + | The schedule is a 1-arg callable that produces a decayed learning + | rate when passed the current optimizer step. This can be useful for changing + | the learning rate value across different invocations of optimizer functions. + | It is computed as: + | + | ```python + | def decayed_learning_rate(step): + | return initial_learning_rate * decay_rate ^ (step / decay_steps) + | ``` + | + | If the argument `staircase` is `True`, then `step / decay_steps` is + | an integer division and the decayed learning rate follows a + | staircase function. + | + | You can pass this schedule directly into a `keras.optimizers.Optimizer` + | as the learning rate. + | Example: When fitting a Keras model, decay every 100000 steps with a base + | of 0.96: + | + | ```python + | initial_learning_rate = 0.1 + | lr_schedule = keras.optimizers.schedules.ExponentialDecay( + | initial_learning_rate, + | decay_steps=100000, + | decay_rate=0.96, + | staircase=True) + | + | model.compile(optimizer=keras.optimizers.SGD(learning_rate=lr_schedule), + | loss='sparse_categorical_crossentropy', + | metrics=['accuracy']) + | + | model.fit(data, labels, epochs=5) + | ``` + | + | The learning rate schedule is also serializable and deserializable using + | `keras.optimizers.schedules.serialize` and + | `keras.optimizers.schedules.deserialize`. + | + | Args: + | initial_learning_rate: A Python float. The initial learning rate. + | decay_steps: A Python integer. Must be positive. See the decay + | computation above. + | decay_rate: A Python float. The decay rate. + | staircase: Boolean. If `True` decay the learning rate at discrete + | intervals. + | name: String. Optional name of the operation. Defaults to + | `"ExponentialDecay`". + | + | Returns: + | A 1-arg callable learning rate schedule that takes the current optimizer + | step and outputs the decayed learning rate, a scalar tensor of the + | same type as `initial_learning_rate`. + | + | Method resolution order: + | ExponentialDecay + | LearningRateSchedule + | builtins.object + | + | Methods defined here: + | + | __call__(self, step) + | Call self as a function. + | + | __init__( + | self, + | initial_learning_rate, + | decay_steps, + | decay_rate, + | staircase=False, + | name='ExponentialDecay' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/learning_rate_schedule_inverse_time_decay.txt b/.tether/man/learning_rate_schedule_inverse_time_decay.txt new file mode 100644 index 0000000000..8ce8642e85 --- /dev/null +++ b/.tether/man/learning_rate_schedule_inverse_time_decay.txt @@ -0,0 +1,87 @@ +Help on class InverseTimeDecay in module keras.src.optimizers.schedules.learning_rate_schedule: + +class InverseTimeDecay(LearningRateSchedule) + | InverseTimeDecay(initial_learning_rate, decay_steps, decay_rate, staircase=False, name='InverseTimeDecay') + | + | A `LearningRateSchedule` that uses an inverse time decay schedule. + | + | When training a model, it is often useful to lower the learning rate as + | the training progresses. This schedule applies the inverse decay function + | to an optimizer step, given a provided initial learning rate. + | It requires a `step` value to compute the decayed learning rate. You can + | just pass a backend variable that you increment at each training step. + | + | The schedule is a 1-arg callable that produces a decayed learning + | rate when passed the current optimizer step. This can be useful for changing + | the learning rate value across different invocations of optimizer functions. + | It is computed as: + | + | ```python + | def decayed_learning_rate(step): + | return initial_learning_rate / (1 + decay_rate * step / decay_step) + | ``` + | + | or, if `staircase` is `True`, as: + | + | ```python + | def decayed_learning_rate(step): + | return initial_learning_rate / + | (1 + decay_rate * floor(step / decay_step)) + | ``` + | + | You can pass this schedule directly into a `keras.optimizers.Optimizer` + | as the learning rate. + | Example: Fit a Keras model when decaying 1/t with a rate of 0.5: + | + | ```python + | ... + | initial_learning_rate = 0.1 + | decay_steps = 1.0 + | decay_rate = 0.5 + | learning_rate_fn = keras.optimizers.schedules.InverseTimeDecay( + | initial_learning_rate, decay_steps, decay_rate) + | + | model.compile(optimizer=keras.optimizers.SGD( + | learning_rate=learning_rate_fn), + | loss='sparse_categorical_crossentropy', + | metrics=['accuracy']) + | + | model.fit(data, labels, epochs=5) + | ``` + | + | Args: + | initial_learning_rate: A Python float. The initial learning rate. + | decay_steps: How often to apply decay. + | decay_rate: A Python number. The decay rate. + | staircase: Whether to apply decay in a discrete staircase, as o + | pposed to continuous, fashion. + | name: String. Optional name of the operation. Defaults to + | `"InverseTimeDecay"`. + | + | Returns: + | A 1-arg callable learning rate schedule that takes the current optimizer + | step and outputs the decayed learning rate, a scalar tensor of the + | same type as `initial_learning_rate`. + | + | Method resolution order: + | InverseTimeDecay + | LearningRateSchedule + | builtins.object + | + | Methods defined here: + | + | __call__(self, step) + | Call self as a function. + | + | __init__( + | self, + | initial_learning_rate, + | decay_steps, + | decay_rate, + | staircase=False, + | name='InverseTimeDecay' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/learning_rate_schedule_piecewise_constant_decay.txt b/.tether/man/learning_rate_schedule_piecewise_constant_decay.txt new file mode 100644 index 0000000000..6c052637b4 --- /dev/null +++ b/.tether/man/learning_rate_schedule_piecewise_constant_decay.txt @@ -0,0 +1,76 @@ +Help on class PiecewiseConstantDecay in module keras.src.optimizers.schedules.learning_rate_schedule: + +class PiecewiseConstantDecay(LearningRateSchedule) + | PiecewiseConstantDecay(boundaries, values, name='PiecewiseConstant') + | + | A `LearningRateSchedule` that uses a piecewise constant decay schedule. + | + | The function returns a 1-arg callable to compute the piecewise constant + | when passed the current optimizer step. This can be useful for changing the + | learning rate value across different invocations of optimizer functions. + | + | Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5 + | for the next 10000 steps, and 0.1 for any additional steps. + | + | ```python + | step = ops.array(0) + | boundaries = [100000, 110000] + | values = [1.0, 0.5, 0.1] + | learning_rate_fn = keras.optimizers.schedules.PiecewiseConstantDecay( + | boundaries, values) + | + | # Later, whenever we perform an optimization step, we pass in the step. + | learning_rate = learning_rate_fn(step) + | ``` + | + | You can pass this schedule directly into a `keras.optimizers.Optimizer` + | as the learning rate. The learning rate schedule is also serializable and + | deserializable using `keras.optimizers.schedules.serialize` and + | `keras.optimizers.schedules.deserialize`. + | + | Args: + | boundaries: A list of Python numbers with strictly increasing + | entries, and with all elements having the same type as the + | optimizer step. + | values: A list of Python numbers that specifies the values for the + | intervals defined by `boundaries`. It should have one more + | element than `boundaries`, and all elements should have the same + | type. + | name: A string. Optional name of the operation. Defaults to + | `"PiecewiseConstant"`. + | + | Returns: + | A 1-arg callable learning rate schedule that takes the current optimizer + | step and outputs the decayed learning rate, a scalar tensor of the + | same type as the boundary tensors. + | + | The output of the 1-arg function that takes the `step` + | is `values[0]` when `step <= boundaries[0]`, + | `values[1]` when `step > boundaries[0]` and `step <= boundaries[1]`, + | ..., and `values[-1]` when `step > boundaries[-1]`. + | + | + | Raises: + | ValueError: if the number of elements in the `boundaries` and `values` + | lists do not match. + | + | Method resolution order: + | PiecewiseConstantDecay + | LearningRateSchedule + | builtins.object + | + | Methods defined here: + | + | __call__(self, step) + | Call self as a function. + | + | __init__( + | self, + | boundaries, + | values, + | name='PiecewiseConstant' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/learning_rate_schedule_polynomial_decay.txt b/.tether/man/learning_rate_schedule_polynomial_decay.txt new file mode 100644 index 0000000000..553bd48aee --- /dev/null +++ b/.tether/man/learning_rate_schedule_polynomial_decay.txt @@ -0,0 +1,108 @@ +Help on class PolynomialDecay in module keras.src.optimizers.schedules.learning_rate_schedule: + +class PolynomialDecay(LearningRateSchedule) + | PolynomialDecay(initial_learning_rate, decay_steps, end_learning_rate=0.0001, power=1.0, cycle=False, name='PolynomialDecay') + | + | A `LearningRateSchedule` that uses a polynomial decay schedule. + | + | It is commonly observed that a monotonically decreasing learning rate, whose + | degree of change is carefully chosen, results in a better performing model. + | This schedule applies a polynomial decay function to an optimizer step, + | given a provided `initial_learning_rate`, to reach an `end_learning_rate` + | in the given `decay_steps`. + | + | It requires a `step` value to compute the decayed learning rate. You + | can just pass a backend variable that you increment at each training + | step. + | + | The schedule is a 1-arg callable that produces a decayed learning rate + | when passed the current optimizer step. This can be useful for changing the + | learning rate value across different invocations of optimizer functions. + | It is computed as: + | + | ```python + | def decayed_learning_rate(step): + | step = min(step, decay_steps) + | return ((initial_learning_rate - end_learning_rate) * + | (1 - step / decay_steps) ^ (power) + | ) + end_learning_rate + | ``` + | + | If `cycle` is True then a multiple of `decay_steps` is used, the first one + | that is bigger than `step`. + | + | ```python + | def decayed_learning_rate(step): + | decay_steps = decay_steps * ceil(step / decay_steps) + | return ((initial_learning_rate - end_learning_rate) * + | (1 - step / decay_steps) ^ (power) + | ) + end_learning_rate + | ``` + | + | You can pass this schedule directly into a `keras.optimizers.Optimizer` + | as the learning rate. + | Example: Fit a model while decaying from 0.1 to 0.01 in 10000 steps using + | sqrt (i.e. power=0.5): + | + | ```python + | ... + | starter_learning_rate = 0.1 + | end_learning_rate = 0.01 + | decay_steps = 10000 + | learning_rate_fn = keras.optimizers.schedules.PolynomialDecay( + | starter_learning_rate, + | decay_steps, + | end_learning_rate, + | power=0.5) + | + | model.compile(optimizer=keras.optimizers.SGD( + | learning_rate=learning_rate_fn), + | loss='sparse_categorical_crossentropy', + | metrics=['accuracy']) + | + | model.fit(data, labels, epochs=5) + | ``` + | + | The learning rate schedule is also serializable and deserializable using + | `keras.optimizers.schedules.serialize` and + | `keras.optimizers.schedules.deserialize`. + | + | Args: + | initial_learning_rate: A Python float. The initial learning rate. + | decay_steps: A Python integer. Must be positive. See the decay + | computation above. + | end_learning_rate: A Python float. The minimal end learning rate. + | power: A Python float. The power of the polynomial. Defaults to + | `1.0`. + | cycle: A boolean, whether it should cycle beyond decay_steps. + | name: String. Optional name of the operation. Defaults to + | `"PolynomialDecay"`. + | + | Returns: + | A 1-arg callable learning rate schedule that takes the current optimizer + | step and outputs the decayed learning rate, a scalar tensor of the + | same type as `initial_learning_rate`. + | + | Method resolution order: + | PolynomialDecay + | LearningRateSchedule + | builtins.object + | + | Methods defined here: + | + | __call__(self, step) + | Call self as a function. + | + | __init__( + | self, + | initial_learning_rate, + | decay_steps, + | end_learning_rate=0.0001, + | power=1.0, + | cycle=False, + | name='PolynomialDecay' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/load_model.txt b/.tether/man/load_model.txt new file mode 100644 index 0000000000..6111172848 --- /dev/null +++ b/.tether/man/load_model.txt @@ -0,0 +1,43 @@ +__signature__ +keras.saving.load_model( + filepath, + custom_objects=None, + compile=True, + safe_mode=True +) +__doc__ +Loads a model saved via `model.save()`. + +Args: + filepath: `str` or `pathlib.Path` object, path to the saved model file. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + compile: Boolean, whether to compile the model after loading. + safe_mode: Boolean, whether to disallow unsafe `lambda` deserialization. + When `safe_mode=False`, loading an object has the potential to + trigger arbitrary code execution. This argument is only + applicable to the Keras v3 model format. Defaults to `True`. + +Returns: + A Keras model instance. If the original model was compiled, + and the argument `compile=True` is set, then the returned model + will be compiled. Otherwise, the model will be left uncompiled. + +Example: + +```python +model = keras.Sequential([ + keras.layers.Dense(5, input_shape=(3,)), + keras.layers.Softmax()]) +model.save("model.keras") +loaded_model = keras.saving.load_model("model.keras") +x = np.random.random((10, 3)) +assert np.allclose(model.predict(x), loaded_model.predict(x)) +``` + +Note that the model variables may have different name values +(`var.name` property, e.g. `"dense_1/kernel:0"`) after being reloaded. +It is recommended that you use layer attributes to +access specific variables, e.g. `model.get_layer("dense_1").kernel`. + diff --git a/.tether/man/load_model_config.txt b/.tether/man/load_model_config.txt new file mode 100644 index 0000000000..7a934efde2 --- /dev/null +++ b/.tether/man/load_model_config.txt @@ -0,0 +1,22 @@ +__signature__ +keras.models.model_from_json(json_string, custom_objects=None) +__doc__ +Parses a JSON model configuration string and returns a model instance. + +Example: + +>>> model = keras.Sequential([ +... keras.layers.Dense(5, input_shape=(3,)), +... keras.layers.Softmax()]) +>>> config = model.to_json() +>>> loaded_model = keras.models.model_from_json(config) + +Args: + json_string: JSON string encoding a model configuration. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + +Returns: + A Keras model instance (uncompiled). + diff --git a/.tether/man/load_model_weights.txt b/.tether/man/load_model_weights.txt new file mode 100644 index 0000000000..9a57d484af --- /dev/null +++ b/.tether/man/load_model_weights.txt @@ -0,0 +1,32 @@ +__signature__ +keras.Model.load_weights( + self, + filepath, + skip_mismatch=False, + **kwargs +) +__doc__ +Load weights from a file saved via `save_weights()`. + +Weights are loaded based on the network's +topology. This means the architecture should be the same as when the +weights were saved. Note that layers that don't have weights are not +taken into account in the topological ordering, so adding or removing +layers is fine as long as they don't have weights. + +**Partial weight loading** + +If you have modified your model, for instance by adding a new layer +(with weights) or by changing the shape of the weights of a layer, +you can choose to ignore errors and continue loading +by setting `skip_mismatch=True`. In this case any layer with +mismatching weights will be skipped. A warning will be displayed +for each skipped layer. + +Args: + filepath: String, path to the weights file to load. + It can either be a `.weights.h5` file + or a legacy `.h5` weights file. + skip_mismatch: Boolean, whether to skip loading of layers where + there is a mismatch in the number of weights, or a mismatch in + the shape of the weights. diff --git a/.tether/man/loss_binary_crossentropy.txt b/.tether/man/loss_binary_crossentropy.txt new file mode 100644 index 0000000000..7dc76321b3 --- /dev/null +++ b/.tether/man/loss_binary_crossentropy.txt @@ -0,0 +1,104 @@ +Help on class BinaryCrossentropy in module keras.src.losses.losses: + +class BinaryCrossentropy(LossFunctionWrapper) + | BinaryCrossentropy(from_logits=False, label_smoothing=0.0, axis=-1, reduction='sum_over_batch_size', name='binary_crossentropy') + | + | Computes the cross-entropy loss between true labels and predicted labels. + | + | Use this cross-entropy loss for binary (0 or 1) classification applications. + | The loss function requires the following inputs: + | + | - `y_true` (true label): This is either 0 or 1. + | - `y_pred` (predicted value): This is the model's prediction, i.e, a single + | floating-point value which either represents a + | [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + | when `from_logits=True`) or a probability (i.e, value in [0., 1.] when + | `from_logits=False`). + | + | Args: + | from_logits: Whether to interpret `y_pred` as a tensor of + | [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we + | assume that `y_pred` is probabilities (i.e., values in [0, 1]). + | label_smoothing: Float in range [0, 1]. When 0, no smoothing occurs. + | When > 0, we compute the loss between the predicted labels + | and a smoothed version of the true labels, where the smoothing + | squeezes the labels towards 0.5. Larger values of + | `label_smoothing` correspond to heavier smoothing. + | axis: The axis along which to compute crossentropy (the features axis). + | Defaults to `-1`. + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Examples: + | + | **Recommended Usage:** (set `from_logits=True`) + | + | With `compile()` API: + | + | ```python + | model.compile( + | loss=keras.losses.BinaryCrossentropy(from_logits=True), + | ... + | ) + | ``` + | + | As a standalone function: + | + | >>> # Example 1: (batch_size = 1, number of samples = 4) + | >>> y_true = [0, 1, 0, 0] + | >>> y_pred = [-18.6, 0.51, 2.94, -12.8] + | >>> bce = keras.losses.BinaryCrossentropy(from_logits=True) + | >>> bce(y_true, y_pred) + | 0.865 + | + | >>> # Example 2: (batch_size = 2, number of samples = 4) + | >>> y_true = [[0, 1], [0, 0]] + | >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] + | >>> # Using default 'auto'/'sum_over_batch_size' reduction type. + | >>> bce = keras.losses.BinaryCrossentropy(from_logits=True) + | >>> bce(y_true, y_pred) + | 0.865 + | >>> # Using 'sample_weight' attribute + | >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]) + | 0.243 + | >>> # Using 'sum' reduction` type. + | >>> bce = keras.losses.BinaryCrossentropy(from_logits=True, + | ... reduction="sum") + | >>> bce(y_true, y_pred) + | 1.730 + | >>> # Using 'none' reduction type. + | >>> bce = keras.losses.BinaryCrossentropy(from_logits=True, + | ... reduction=None) + | >>> bce(y_true, y_pred) + | array([0.235, 1.496], dtype=float32) + | + | **Default Usage:** (set `from_logits=False`) + | + | >>> # Make the following updates to the above "Recommended Usage" section + | >>> # 1. Set `from_logits=False` + | >>> keras.losses.BinaryCrossentropy() # OR ...('from_logits=False') + | >>> # 2. Update `y_pred` to use probabilities instead of logits + | >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]] + | + | Method resolution order: + | BinaryCrossentropy + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | from_logits=False, + | label_smoothing=0.0, + | axis=-1, + | reduction='sum_over_batch_size', + | name='binary_crossentropy' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_binary_focal_crossentropy.txt b/.tether/man/loss_binary_focal_crossentropy.txt new file mode 100644 index 0000000000..5c5c7d6af6 --- /dev/null +++ b/.tether/man/loss_binary_focal_crossentropy.txt @@ -0,0 +1,157 @@ +Help on class BinaryFocalCrossentropy in module keras.src.losses.losses: + +class BinaryFocalCrossentropy(LossFunctionWrapper) + | BinaryFocalCrossentropy(apply_class_balancing=False, alpha=0.25, gamma=2.0, from_logits=False, label_smoothing=0.0, axis=-1, reduction='sum_over_batch_size', name='binary_focal_crossentropy') + | + | Computes focal cross-entropy loss between true labels and predictions. + | + | Binary cross-entropy loss is often used for binary (0 or 1) classification + | tasks. The loss function requires the following inputs: + | + | - `y_true` (true label): This is either 0 or 1. + | - `y_pred` (predicted value): This is the model's prediction, i.e, a single + | floating-point value which either represents a + | [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + | when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when + | `from_logits=False`). + | + | According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it + | helps to apply a "focal factor" to down-weight easy examples and focus more + | on hard examples. By default, the focal tensor is computed as follows: + | + | `focal_factor = (1 - output) ** gamma` for class 1 + | `focal_factor = output ** gamma` for class 0 + | where `gamma` is a focusing parameter. When `gamma=0`, this function is + | equivalent to the binary crossentropy loss. + | + | Args: + | apply_class_balancing: A bool, whether to apply weight balancing on the + | binary classes 0 and 1. + | alpha: A weight balancing factor for class 1, default is `0.25` as + | mentioned in reference [Lin et al., 2018]( + | https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is + | `1.0 - alpha`. + | gamma: A focusing parameter used to compute the focal factor, default is + | `2.0` as mentioned in the reference + | [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). + | from_logits: Whether to interpret `y_pred` as a tensor of + | [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we + | assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). + | label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. + | When > `0`, we compute the loss between the predicted labels + | and a smoothed version of the true labels, where the smoothing + | squeezes the labels towards `0.5`. + | Larger values of `label_smoothing` correspond to heavier smoothing. + | axis: The axis along which to compute crossentropy (the features axis). + | Defaults to `-1`. + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Examples: + | + | With the `compile()` API: + | + | ```python + | model.compile( + | loss=keras.losses.BinaryFocalCrossentropy( + | gamma=2.0, from_logits=True), + | ... + | ) + | ``` + | + | As a standalone function: + | + | >>> # Example 1: (batch_size = 1, number of samples = 4) + | >>> y_true = [0, 1, 0, 0] + | >>> y_pred = [-18.6, 0.51, 2.94, -12.8] + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... gamma=2, from_logits=True) + | >>> loss(y_true, y_pred) + | 0.691 + | + | >>> # Apply class weight + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... apply_class_balancing=True, gamma=2, from_logits=True) + | >>> loss(y_true, y_pred) + | 0.51 + | + | >>> # Example 2: (batch_size = 2, number of samples = 4) + | >>> y_true = [[0, 1], [0, 0]] + | >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] + | >>> # Using default 'auto'/'sum_over_batch_size' reduction type. + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... gamma=3, from_logits=True) + | >>> loss(y_true, y_pred) + | 0.647 + | + | >>> # Apply class weight + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... apply_class_balancing=True, gamma=3, from_logits=True) + | >>> loss(y_true, y_pred) + | 0.482 + | + | >>> # Using 'sample_weight' attribute with focal effect + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... gamma=3, from_logits=True) + | >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]) + | 0.133 + | + | >>> # Apply class weight + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... apply_class_balancing=True, gamma=3, from_logits=True) + | >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]) + | 0.097 + | + | >>> # Using 'sum' reduction` type. + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... gamma=4, from_logits=True, + | ... reduction="sum") + | >>> loss(y_true, y_pred) + | 1.222 + | + | >>> # Apply class weight + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... apply_class_balancing=True, gamma=4, from_logits=True, + | ... reduction="sum") + | >>> loss(y_true, y_pred) + | 0.914 + | + | >>> # Using 'none' reduction type. + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... gamma=5, from_logits=True, + | ... reduction=None) + | >>> loss(y_true, y_pred) + | array([0.0017 1.1561], dtype=float32) + | + | >>> # Apply class weight + | >>> loss = keras.losses.BinaryFocalCrossentropy( + | ... apply_class_balancing=True, gamma=5, from_logits=True, + | ... reduction=None) + | >>> loss(y_true, y_pred) + | array([0.0004 0.8670], dtype=float32) + | + | Method resolution order: + | BinaryFocalCrossentropy + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | apply_class_balancing=False, + | alpha=0.25, + | gamma=2.0, + | from_logits=False, + | label_smoothing=0.0, + | axis=-1, + | reduction='sum_over_batch_size', + | name='binary_focal_crossentropy' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_categorical_crossentropy.txt b/.tether/man/loss_categorical_crossentropy.txt new file mode 100644 index 0000000000..1db6a69e9d --- /dev/null +++ b/.tether/man/loss_categorical_crossentropy.txt @@ -0,0 +1,82 @@ +Help on class CategoricalCrossentropy in module keras.src.losses.losses: + +class CategoricalCrossentropy(LossFunctionWrapper) + | CategoricalCrossentropy(from_logits=False, label_smoothing=0.0, axis=-1, reduction='sum_over_batch_size', name='categorical_crossentropy') + | + | Computes the crossentropy loss between the labels and predictions. + | + | Use this crossentropy loss function when there are two or more label + | classes. We expect labels to be provided in a `one_hot` representation. If + | you want to provide labels as integers, please use + | `SparseCategoricalCrossentropy` loss. There should be `num_classes` floating + | point values per feature, i.e., the shape of both `y_pred` and `y_true` are + | `[batch_size, num_classes]`. + | + | Args: + | from_logits: Whether `y_pred` is expected to be a logits tensor. By + | default, we assume that `y_pred` encodes a probability distribution. + | label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, + | meaning the confidence on label values are relaxed. For example, if + | `0.1`, use `0.1 / num_classes` for non-target labels and + | `0.9 + 0.1 / num_classes` for target labels. + | axis: The axis along which to compute crossentropy (the features + | axis). Defaults to `-1`. + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Examples: + | + | Standalone usage: + | + | >>> y_true = [[0, 1, 0], [0, 0, 1]] + | >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + | >>> # Using 'auto'/'sum_over_batch_size' reduction type. + | >>> cce = keras.losses.CategoricalCrossentropy() + | >>> cce(y_true, y_pred) + | 1.177 + | + | >>> # Calling with 'sample_weight'. + | >>> cce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) + | 0.814 + | + | >>> # Using 'sum' reduction type. + | >>> cce = keras.losses.CategoricalCrossentropy( + | ... reduction="sum") + | >>> cce(y_true, y_pred) + | 2.354 + | + | >>> # Using 'none' reduction type. + | >>> cce = keras.losses.CategoricalCrossentropy( + | ... reduction=None) + | >>> cce(y_true, y_pred) + | array([0.0513, 2.303], dtype=float32) + | + | Usage with the `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss=keras.losses.CategoricalCrossentropy()) + | ``` + | + | Method resolution order: + | CategoricalCrossentropy + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | from_logits=False, + | label_smoothing=0.0, + | axis=-1, + | reduction='sum_over_batch_size', + | name='categorical_crossentropy' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_categorical_focal_crossentropy.txt b/.tether/man/loss_categorical_focal_crossentropy.txt new file mode 100644 index 0000000000..da1113b885 --- /dev/null +++ b/.tether/man/loss_categorical_focal_crossentropy.txt @@ -0,0 +1,125 @@ +Help on class CategoricalFocalCrossentropy in module keras.src.losses.losses: + +class CategoricalFocalCrossentropy(LossFunctionWrapper) + | CategoricalFocalCrossentropy(alpha=0.25, gamma=2.0, from_logits=False, label_smoothing=0.0, axis=-1, reduction='sum_over_batch_size', name='categorical_focal_crossentropy') + | + | Computes the alpha balanced focal crossentropy loss. + | + | Use this crossentropy loss function when there are two or more label + | classes and if you want to handle class imbalance without using + | `class_weights`. We expect labels to be provided in a `one_hot` + | representation. + | + | According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it + | helps to apply a focal factor to down-weight easy examples and focus more on + | hard examples. The general formula for the focal loss (FL) + | is as follows: + | + | `FL(p_t) = (1 - p_t) ** gamma * log(p_t)` + | + | where `p_t` is defined as follows: + | `p_t = output if y_true == 1, else 1 - output` + | + | `(1 - p_t) ** gamma` is the `modulating_factor`, where `gamma` is a focusing + | parameter. When `gamma` = 0, there is no focal effect on the cross entropy. + | `gamma` reduces the importance given to simple examples in a smooth manner. + | + | The authors use alpha-balanced variant of focal loss (FL) in the paper: + | `FL(p_t) = -alpha * (1 - p_t) ** gamma * log(p_t)` + | + | where `alpha` is the weight factor for the classes. If `alpha` = 1, the + | loss won't be able to handle class imbalance properly as all + | classes will have the same weight. This can be a constant or a list of + | constants. If alpha is a list, it must have the same length as the number + | of classes. + | + | The formula above can be generalized to: + | `FL(p_t) = alpha * (1 - p_t) ** gamma * CrossEntropy(y_true, y_pred)` + | + | where minus comes from `CrossEntropy(y_true, y_pred)` (CE). + | + | Extending this to multi-class case is straightforward: + | `FL(p_t) = alpha * (1 - p_t) ** gamma * CategoricalCE(y_true, y_pred)` + | + | In the snippet below, there is `num_classes` floating pointing values per + | example. The shape of both `y_pred` and `y_true` are + | `(batch_size, num_classes)`. + | + | Args: + | alpha: A weight balancing factor for all classes, default is `0.25` as + | mentioned in the reference. It can be a list of floats or a scalar. + | In the multi-class case, alpha may be set by inverse class + | frequency by using `compute_class_weight` from `sklearn.utils`. + | gamma: A focusing parameter, default is `2.0` as mentioned in the + | reference. It helps to gradually reduce the importance given to + | simple (easy) examples in a smooth manner. + | from_logits: Whether `output` is expected to be a logits tensor. By + | default, we consider that `output` encodes a probability + | distribution. + | label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, + | meaning the confidence on label values are relaxed. For example, if + | `0.1`, use `0.1 / num_classes` for non-target labels and + | `0.9 + 0.1 / num_classes` for target labels. + | axis: The axis along which to compute crossentropy (the features + | axis). Defaults to `-1`. + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Examples: + | + | Standalone usage: + | + | >>> y_true = [[0., 1., 0.], [0., 0., 1.]] + | >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + | >>> # Using 'auto'/'sum_over_batch_size' reduction type. + | >>> cce = keras.losses.CategoricalFocalCrossentropy() + | >>> cce(y_true, y_pred) + | 0.23315276 + | + | >>> # Calling with 'sample_weight'. + | >>> cce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) + | 0.1632 + | + | >>> # Using 'sum' reduction type. + | >>> cce = keras.losses.CategoricalFocalCrossentropy( + | ... reduction="sum") + | >>> cce(y_true, y_pred) + | 0.46631 + | + | >>> # Using 'none' reduction type. + | >>> cce = keras.losses.CategoricalFocalCrossentropy( + | ... reduction=None) + | >>> cce(y_true, y_pred) + | array([3.2058331e-05, 4.6627346e-01], dtype=float32) + | + | Usage with the `compile()` API: + | + | ```python + | model.compile(optimizer='adam', + | loss=keras.losses.CategoricalFocalCrossentropy()) + | ``` + | + | Method resolution order: + | CategoricalFocalCrossentropy + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | alpha=0.25, + | gamma=2.0, + | from_logits=False, + | label_smoothing=0.0, + | axis=-1, + | reduction='sum_over_batch_size', + | name='categorical_focal_crossentropy' + | ) + | Initializes `CategoricalFocalCrossentropy` instance. + | + | get_config(self) + | diff --git a/.tether/man/loss_categorical_hinge.txt b/.tether/man/loss_categorical_hinge.txt new file mode 100644 index 0000000000..f0881fa96d --- /dev/null +++ b/.tether/man/loss_categorical_hinge.txt @@ -0,0 +1,38 @@ +Help on class CategoricalHinge in module keras.src.losses.losses: + +class CategoricalHinge(LossFunctionWrapper) + | CategoricalHinge(reduction='sum_over_batch_size', name='categorical_hinge') + | + | Computes the categorical hinge loss between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | loss = maximum(neg - pos + 1, 0) + | ``` + | + | where `neg=maximum((1-y_true)*y_pred)` and `pos=sum(y_true*y_pred)` + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | CategoricalHinge + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='categorical_hinge' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_cosine_similarity.txt b/.tether/man/loss_cosine_similarity.txt new file mode 100644 index 0000000000..d5cfe4c312 --- /dev/null +++ b/.tether/man/loss_cosine_similarity.txt @@ -0,0 +1,46 @@ +Help on class CosineSimilarity in module keras.src.losses.losses: + +class CosineSimilarity(LossFunctionWrapper) + | CosineSimilarity(axis=-1, reduction='sum_over_batch_size', name='cosine_similarity') + | + | Computes the cosine similarity between `y_true` & `y_pred`. + | + | Note that it is a number between -1 and 1. When it is a negative number + | between -1 and 0, 0 indicates orthogonality and values closer to -1 + | indicate greater similarity. This makes it usable as a loss function in a + | setting where you try to maximize the proximity between predictions and + | targets. If either `y_true` or `y_pred` is a zero vector, cosine similarity + | will be 0 regardless of the proximity between predictions and targets. + | + | Formula: + | + | ```python + | loss = -sum(l2_norm(y_true) * l2_norm(y_pred)) + | ``` + | + | Args: + | axis: The axis along which the cosine similarity is computed + | (the features axis). Defaults to `-1`. + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | CosineSimilarity + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | axis=-1, + | reduction='sum_over_batch_size', + | name='cosine_similarity' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_ctc.txt b/.tether/man/loss_ctc.txt new file mode 100644 index 0000000000..e7c4621d86 --- /dev/null +++ b/.tether/man/loss_ctc.txt @@ -0,0 +1,33 @@ +Help on class CTC in module keras.src.losses.losses: + +class CTC(LossFunctionWrapper) + | CTC(reduction='sum_over_batch_size', name='sparse_categorical_crossentropy') + | + | CTC (Connectionist Temporal Classification) loss. + | + | Args: + | y_true: A tensor of shape `(batch_size, target_max_length)` containing + | the true labels in integer format. `0` always represents + | the blank/mask index and should not be used for classes. + | y_pred: A tensor of shape `(batch_size, output_max_length, num_classes)` + | containing logits (the output of your model). + | They should *not* be normalized via softmax. + | + | Method resolution order: + | CTC + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='sparse_categorical_crossentropy' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | + diff --git a/.tether/man/loss_dice.txt b/.tether/man/loss_dice.txt new file mode 100644 index 0000000000..ab4fb84dc2 --- /dev/null +++ b/.tether/man/loss_dice.txt @@ -0,0 +1,37 @@ +Help on class Dice in module keras.src.losses.losses: + +class Dice(LossFunctionWrapper) + | Dice(reduction='sum_over_batch_size', name='dice') + | + | Computes the Dice loss value between `y_true` and `y_pred`. + | + | Formula: + | ```python + | loss = 1 - (2 * sum(y_true * y_pred)) / (sum(y_true) + sum(y_pred)) + | ``` + | + | Args: + | y_true: tensor of true targets. + | y_pred: tensor of predicted targets. + | + | Returns: + | Dice loss value. + | + | Method resolution order: + | Dice + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='dice' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | + diff --git a/.tether/man/loss_hinge.txt b/.tether/man/loss_hinge.txt new file mode 100644 index 0000000000..906132b343 --- /dev/null +++ b/.tether/man/loss_hinge.txt @@ -0,0 +1,39 @@ +Help on class Hinge in module keras.src.losses.losses: + +class Hinge(LossFunctionWrapper) + | Hinge(reduction='sum_over_batch_size', name='hinge') + | + | Computes the hinge loss between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | loss = maximum(1 - y_true * y_pred, 0) + | ``` + | + | `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + | provided we will convert them to -1 or 1. + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | Hinge + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='hinge' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_huber.txt b/.tether/man/loss_huber.txt new file mode 100644 index 0000000000..c5808a4ebc --- /dev/null +++ b/.tether/man/loss_huber.txt @@ -0,0 +1,46 @@ +Help on class Huber in module keras.src.losses.losses: + +class Huber(LossFunctionWrapper) + | Huber(delta=1.0, reduction='sum_over_batch_size', name='huber_loss') + | + | Computes the Huber loss between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | for x in error: + | if abs(x) <= delta: + | loss.append(0.5 * x^2) + | elif abs(x) > delta: + | loss.append(delta * abs(x) - 0.5 * delta^2) + | + | loss = mean(loss, axis=-1) + | ``` + | See: [Huber loss](https://en.wikipedia.org/wiki/Huber_loss). + | + | Args: + | delta: A float, the point where the Huber loss function changes from a + | quadratic to linear. + | reduction: Type of reduction to apply to loss. Options are `"sum"`, + | `"sum_over_batch_size"` or `None`. Defaults to + | `"sum_over_batch_size"`. + | name: Optional name for the instance. + | + | Method resolution order: + | Huber + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | delta=1.0, + | reduction='sum_over_batch_size', + | name='huber_loss' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_kl_divergence.txt b/.tether/man/loss_kl_divergence.txt new file mode 100644 index 0000000000..55eb404696 --- /dev/null +++ b/.tether/man/loss_kl_divergence.txt @@ -0,0 +1,41 @@ +Help on class KLDivergence in module keras.src.losses.losses: + +class KLDivergence(LossFunctionWrapper) + | KLDivergence(reduction='sum_over_batch_size', name='kl_divergence') + | + | Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | loss = y_true * log(y_true / y_pred) + | ``` + | + | `y_true` and `y_pred` are expected to be probability + | distributions, with values between 0 and 1. They will get + | clipped to the `[0, 1]` range. + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | KLDivergence + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='kl_divergence' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | + diff --git a/.tether/man/loss_log_cosh.txt b/.tether/man/loss_log_cosh.txt new file mode 100644 index 0000000000..454e1d1cb9 --- /dev/null +++ b/.tether/man/loss_log_cosh.txt @@ -0,0 +1,38 @@ +Help on class LogCosh in module keras.src.losses.losses: + +class LogCosh(LossFunctionWrapper) + | LogCosh(reduction='sum_over_batch_size', name='log_cosh') + | + | Computes the logarithm of the hyperbolic cosine of the prediction error. + | + | Formula: + | + | ```python + | error = y_pred - y_true + | logcosh = mean(log((exp(error) + exp(-error))/2), axis=-1)` + | ``` + | where x is the error `y_pred - y_true`. + | + | Args: + | reduction: Type of reduction to apply to loss. Options are `"sum"`, + | `"sum_over_batch_size"` or `None`. Defaults to + | `"sum_over_batch_size"`. + | name: Optional name for the instance. + | + | Method resolution order: + | LogCosh + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='log_cosh' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_mean_absolute_error.txt b/.tether/man/loss_mean_absolute_error.txt new file mode 100644 index 0000000000..d20733325a --- /dev/null +++ b/.tether/man/loss_mean_absolute_error.txt @@ -0,0 +1,36 @@ +Help on class MeanAbsoluteError in module keras.src.losses.losses: + +class MeanAbsoluteError(LossFunctionWrapper) + | MeanAbsoluteError(reduction='sum_over_batch_size', name='mean_absolute_error') + | + | Computes the mean of absolute difference between labels and predictions. + | + | Formula: + | + | ```python + | loss = mean(abs(y_true - y_pred)) + | ``` + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | MeanAbsoluteError + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='mean_absolute_error' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_mean_absolute_percentage_error.txt b/.tether/man/loss_mean_absolute_percentage_error.txt new file mode 100644 index 0000000000..4e772dbd8c --- /dev/null +++ b/.tether/man/loss_mean_absolute_percentage_error.txt @@ -0,0 +1,36 @@ +Help on class MeanAbsolutePercentageError in module keras.src.losses.losses: + +class MeanAbsolutePercentageError(LossFunctionWrapper) + | MeanAbsolutePercentageError(reduction='sum_over_batch_size', name='mean_absolute_percentage_error') + | + | Computes the mean absolute percentage error between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | loss = 100 * mean(abs((y_true - y_pred) / y_true)) + | ``` + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | MeanAbsolutePercentageError + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='mean_absolute_percentage_error' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_mean_squared_error.txt b/.tether/man/loss_mean_squared_error.txt new file mode 100644 index 0000000000..06f43f5ef6 --- /dev/null +++ b/.tether/man/loss_mean_squared_error.txt @@ -0,0 +1,36 @@ +Help on class MeanSquaredError in module keras.src.losses.losses: + +class MeanSquaredError(LossFunctionWrapper) + | MeanSquaredError(reduction='sum_over_batch_size', name='mean_squared_error') + | + | Computes the mean of squares of errors between labels and predictions. + | + | Formula: + | + | ```python + | loss = mean(square(y_true - y_pred)) + | ``` + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | MeanSquaredError + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='mean_squared_error' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_mean_squared_logarithmic_error.txt b/.tether/man/loss_mean_squared_logarithmic_error.txt new file mode 100644 index 0000000000..67c8bf60af --- /dev/null +++ b/.tether/man/loss_mean_squared_logarithmic_error.txt @@ -0,0 +1,36 @@ +Help on class MeanSquaredLogarithmicError in module keras.src.losses.losses: + +class MeanSquaredLogarithmicError(LossFunctionWrapper) + | MeanSquaredLogarithmicError(reduction='sum_over_batch_size', name='mean_squared_logarithmic_error') + | + | Computes the mean squared logarithmic error between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | loss = mean(square(log(y_true + 1) - log(y_pred + 1))) + | ``` + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | MeanSquaredLogarithmicError + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='mean_squared_logarithmic_error' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_poisson.txt b/.tether/man/loss_poisson.txt new file mode 100644 index 0000000000..e20dedccf1 --- /dev/null +++ b/.tether/man/loss_poisson.txt @@ -0,0 +1,36 @@ +Help on class Poisson in module keras.src.losses.losses: + +class Poisson(LossFunctionWrapper) + | Poisson(reduction='sum_over_batch_size', name='poisson') + | + | Computes the Poisson loss between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | loss = y_pred - y_true * log(y_pred) + | ``` + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | Poisson + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='poisson' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_sparse_categorical_crossentropy.txt b/.tether/man/loss_sparse_categorical_crossentropy.txt new file mode 100644 index 0000000000..3a158df1bd --- /dev/null +++ b/.tether/man/loss_sparse_categorical_crossentropy.txt @@ -0,0 +1,78 @@ +Help on class SparseCategoricalCrossentropy in module keras.src.losses.losses: + +class SparseCategoricalCrossentropy(LossFunctionWrapper) + | SparseCategoricalCrossentropy(from_logits=False, ignore_class=None, reduction='sum_over_batch_size', name='sparse_categorical_crossentropy') + | + | Computes the crossentropy loss between the labels and predictions. + | + | Use this crossentropy loss function when there are two or more label + | classes. We expect labels to be provided as integers. If you want to + | provide labels using `one-hot` representation, please use + | `CategoricalCrossentropy` loss. There should be `# classes` floating point + | values per feature for `y_pred` and a single floating point value per + | feature for `y_true`. + | + | In the snippet below, there is a single floating point value per example for + | `y_true` and `num_classes` floating pointing values per example for + | `y_pred`. The shape of `y_true` is `[batch_size]` and the shape of `y_pred` + | is `[batch_size, num_classes]`. + | + | Args: + | from_logits: Whether `y_pred` is expected to be a logits tensor. By + | default, we assume that `y_pred` encodes a probability distribution. + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Examples: + | + | >>> y_true = [1, 2] + | >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] + | >>> # Using 'auto'/'sum_over_batch_size' reduction type. + | >>> scce = keras.losses.SparseCategoricalCrossentropy() + | >>> scce(y_true, y_pred) + | 1.177 + | + | >>> # Calling with 'sample_weight'. + | >>> scce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) + | 0.814 + | + | >>> # Using 'sum' reduction type. + | >>> scce = keras.losses.SparseCategoricalCrossentropy( + | ... reduction="sum") + | >>> scce(y_true, y_pred) + | 2.354 + | + | >>> # Using 'none' reduction type. + | >>> scce = keras.losses.SparseCategoricalCrossentropy( + | ... reduction=None) + | >>> scce(y_true, y_pred) + | array([0.0513, 2.303], dtype=float32) + | + | Usage with the `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss=keras.losses.SparseCategoricalCrossentropy()) + | ``` + | + | Method resolution order: + | SparseCategoricalCrossentropy + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | from_logits=False, + | ignore_class=None, + | reduction='sum_over_batch_size', + | name='sparse_categorical_crossentropy' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_squared_hinge.txt b/.tether/man/loss_squared_hinge.txt new file mode 100644 index 0000000000..28a4617fbd --- /dev/null +++ b/.tether/man/loss_squared_hinge.txt @@ -0,0 +1,39 @@ +Help on class SquaredHinge in module keras.src.losses.losses: + +class SquaredHinge(LossFunctionWrapper) + | SquaredHinge(reduction='sum_over_batch_size', name='squared_hinge') + | + | Computes the squared hinge loss between `y_true` & `y_pred`. + | + | Formula: + | + | ```python + | loss = square(maximum(1 - y_true * y_pred, 0)) + | ``` + | + | `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + | provided we will convert them to -1 or 1. + | + | Args: + | reduction: Type of reduction to apply to the loss. In almost all cases + | this should be `"sum_over_batch_size"`. + | Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. + | name: Optional name for the loss instance. + | + | Method resolution order: + | SquaredHinge + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | reduction='sum_over_batch_size', + | name='squared_hinge' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | diff --git a/.tether/man/loss_tversky.txt b/.tether/man/loss_tversky.txt new file mode 100644 index 0000000000..8687ef6a20 --- /dev/null +++ b/.tether/man/loss_tversky.txt @@ -0,0 +1,46 @@ +Help on class Tversky in module keras.src.losses.losses: + +class Tversky(LossFunctionWrapper) + | Tversky(alpha=0.5, beta=0.5, reduction='sum_over_batch_size', name='tversky') + | + | Computes the Tversky loss value between `y_true` and `y_pred`. + | + | This loss function is weighted by the alpha and beta coefficients + | that penalize false positives and false negatives. + | + | With `alpha=0.5` and `beta=0.5`, the loss value becomes equivalent to + | Dice Loss. + | + | Args: + | y_true: tensor of true targets. + | y_pred: tensor of predicted targets. + | alpha: coefficient controlling incidence of false positives. + | beta: coefficient controlling incidence of false negatives. + | + | Returns: + | Tversky loss value. + | + | Reference: + | + | - [Salehi et al., 2017](https://arxiv.org/abs/1706.05721) + | + | Method resolution order: + | Tversky + | LossFunctionWrapper + | keras.src.losses.loss.Loss + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | alpha=0.5, + | beta=0.5, + | reduction='sum_over_batch_size', + | name='tversky' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | + diff --git a/.tether/man/metric_auc.txt b/.tether/man/metric_auc.txt new file mode 100644 index 0000000000..4e1792dbb8 --- /dev/null +++ b/.tether/man/metric_auc.txt @@ -0,0 +1,229 @@ +Help on class AUC in module keras.src.metrics.confusion_metrics: + +class AUC(keras.src.metrics.metric.Metric) + | AUC(num_thresholds=200, curve='ROC', summation_method='interpolation', name=None, dtype=None, thresholds=None, multi_label=False, num_labels=None, label_weights=None, from_logits=False) + | + | Approximates the AUC (Area under the curve) of the ROC or PR curves. + | + | The AUC (Area under the curve) of the ROC (Receiver operating + | characteristic; default) or PR (Precision Recall) curves are quality + | measures of binary classifiers. Unlike the accuracy, and like cross-entropy + | losses, ROC-AUC and PR-AUC evaluate all the operational points of a model. + | + | This class approximates AUCs using a Riemann sum. During the metric + | accumulation phrase, predictions are accumulated within predefined buckets + | by value. The AUC is then computed by interpolating per-bucket averages. + | These buckets define the evaluated operational points. + | + | This metric creates four local variables, `true_positives`, + | `true_negatives`, `false_positives` and `false_negatives` that are used to + | compute the AUC. To discretize the AUC curve, a linearly spaced set of + | thresholds is used to compute pairs of recall and precision values. The area + | under the ROC-curve is therefore computed using the height of the recall + | values by the false positive rate, while the area under the PR-curve is the + | computed using the height of the precision values by the recall. + | + | This value is ultimately returned as `auc`, an idempotent operation that + | computes the area under a discretized curve of precision versus recall + | values (computed using the aforementioned variables). The `num_thresholds` + | variable controls the degree of discretization with larger numbers of + | thresholds more closely approximating the true AUC. The quality of the + | approximation may vary dramatically depending on `num_thresholds`. The + | `thresholds` parameter can be used to manually specify thresholds which + | split the predictions more evenly. + | + | For a best approximation of the real AUC, `predictions` should be + | distributed approximately uniformly in the range `[0, 1]` (if + | `from_logits=False`). The quality of the AUC approximation may be poor if + | this is not the case. Setting `summation_method` to 'minoring' or 'majoring' + | can help quantify the error in the approximation by providing lower or upper + | bound estimate of the AUC. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | num_thresholds: (Optional) The number of thresholds to + | use when discretizing the roc curve. Values must be > 1. + | Defaults to `200`. + | curve: (Optional) Specifies the name of the curve to be computed, + | `'ROC'` (default) or `'PR'` for the Precision-Recall-curve. + | summation_method: (Optional) Specifies the [Riemann summation method]( + | https://en.wikipedia.org/wiki/Riemann_sum) used. + | 'interpolation' (default) applies mid-point summation scheme for + | `ROC`. For PR-AUC, interpolates (true/false) positives but not + | the ratio that is precision (see Davis & Goadrich 2006 for + | details); 'minoring' applies left summation for increasing + | intervals and right summation for decreasing intervals; 'majoring' + | does the opposite. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | thresholds: (Optional) A list of floating point values to use as the + | thresholds for discretizing the curve. If set, the `num_thresholds` + | parameter is ignored. Values should be in `[0, 1]`. Endpoint + | thresholds equal to {`-epsilon`, `1+epsilon`} for a small positive + | epsilon value will be automatically included with these to correctly + | handle predictions equal to exactly 0 or 1. + | multi_label: boolean indicating whether multilabel data should be + | treated as such, wherein AUC is computed separately for each label + | and then averaged across labels, or (when `False`) if the data + | should be flattened into a single label before AUC computation. In + | the latter case, when multilabel data is passed to AUC, each + | label-prediction pair is treated as an individual data point. Should + | be set to `False` for multi-class data. + | num_labels: (Optional) The number of labels, used when `multi_label` is + | True. If `num_labels` is not specified, then state variables get + | created on the first call to `update_state`. + | label_weights: (Optional) list, array, or tensor of non-negative weights + | used to compute AUCs for multilabel data. When `multi_label` is + | True, the weights are applied to the individual label AUCs when they + | are averaged to produce the multi-label AUC. When it's False, they + | are used to weight the individual label predictions in computing the + | confusion matrix on the flattened data. Note that this is unlike + | `class_weights` in that `class_weights` weights the example + | depending on the value of its label, whereas `label_weights` depends + | only on the index of that label before flattening; therefore + | `label_weights` should not be used for multi-class data. + | from_logits: boolean indicating whether the predictions (`y_pred` in + | `update_state`) are probabilities or sigmoid logits. As a rule of thumb, + | when using a keras loss, the `from_logits` constructor argument of the + | loss should match the AUC `from_logits` constructor argument. + | + | Example: + | + | >>> m = keras.metrics.AUC(num_thresholds=3) + | >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) + | >>> # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7] + | >>> # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] + | >>> # tp_rate = recall = [1, 0.5, 0], fp_rate = [1, 0, 0] + | >>> # auc = ((((1 + 0.5) / 2) * (1 - 0)) + (((0.5 + 0) / 2) * (0 - 0))) + | >>> # = 0.75 + | >>> m.result() + | 0.75 + | + | >>> m.reset_state() + | >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9], + | ... sample_weight=[1, 0, 0, 1]) + | >>> m.result() + | 1.0 + | + | Usage with `compile()` API: + | + | ```python + | # Reports the AUC of a model outputting a probability. + | model.compile(optimizer='sgd', + | loss=keras.losses.BinaryCrossentropy(), + | metrics=[keras.metrics.AUC()]) + | + | # Reports the AUC of a model outputting a logit. + | model.compile(optimizer='sgd', + | loss=keras.losses.BinaryCrossentropy(from_logits=True), + | metrics=[keras.metrics.AUC(from_logits=True)]) + | ``` + | + | Method resolution order: + | AUC + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_thresholds=200, + | curve='ROC', + | summation_method='interpolation', + | name=None, + | dtype=None, + | thresholds=None, + | multi_label=False, + | num_labels=None, + | label_weights=None, + | from_logits=False + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | interpolate_pr_auc(self) + | Interpolation formula inspired by section 4 of Davis & Goadrich 2006. + | + | https://www.biostat.wisc.edu/~page/rocpr.pdf + | + | Note here we derive & use a closed formula not present in the paper + | as follows: + | + | Precision = TP / (TP + FP) = TP / P + | + | Modeling all of TP (true positive), FP (false positive) and their sum + | P = TP + FP (predicted positive) as varying linearly within each + | interval [A, B] between successive thresholds, we get + | + | Precision slope = dTP / dP + | = (TP_B - TP_A) / (P_B - P_A) + | = (TP - TP_A) / (P - P_A) + | Precision = (TP_A + slope * (P - P_A)) / P + | + | The area within the interval is (slope / total_pos_weight) times + | + | int_A^B{Precision.dP} = int_A^B{(TP_A + slope * (P - P_A)) * dP / P} + | int_A^B{Precision.dP} = int_A^B{slope * dP + intercept * dP / P} + | + | where intercept = TP_A - slope * P_A = TP_B - slope * P_B, resulting in + | + | int_A^B{Precision.dP} = TP_B - TP_A + intercept * log(P_B / P_A) + | + | Bringing back the factor (slope / total_pos_weight) we'd put aside, we + | get + | + | slope * [dTP + intercept * log(P_B / P_A)] / total_pos_weight + | + | where dTP == TP_B - TP_A. + | + | Note that when P_A == 0 the above calculation simplifies into + | + | int_A^B{Precision.dTP} = int_A^B{slope * dTP} + | = slope * (TP_B - TP_A) + | + | which is really equivalent to imputing constant precision throughout the + | first bucket having >0 true positives. + | + | Returns: + | pr_auc: an approximation of the area under the P-R curve. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulates confusion matrix statistics. + | + | Args: + | y_true: The ground truth values. + | y_pred: The predicted values. + | sample_weight: Optional weighting of each example. Can + | be a tensor whose rank is either 0, or the same rank as + | `y_true`, and must be broadcastable to `y_true`. Defaults to + | `1`. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | thresholds + | The thresholds used for evaluating AUC. + | + diff --git a/.tether/man/metric_binary_accuracy.txt b/.tether/man/metric_binary_accuracy.txt new file mode 100644 index 0000000000..104d71912c --- /dev/null +++ b/.tether/man/metric_binary_accuracy.txt @@ -0,0 +1,63 @@ +Help on class BinaryAccuracy in module keras.src.metrics.accuracy_metrics: + +class BinaryAccuracy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | BinaryAccuracy(name='binary_accuracy', dtype=None, threshold=0.5) + | + | Calculates how often predictions match binary labels. + | + | This metric creates two local variables, `total` and `count` that are used + | to compute the frequency with which `y_pred` matches `y_true`. This + | frequency is ultimately returned as `binary accuracy`: an idempotent + | operation that simply divides `total` by `count`. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | threshold: (Optional) Float representing the threshold for deciding + | whether prediction values are 1 or 0. + | + | Example: + | + | >>> m = keras.metrics.BinaryAccuracy() + | >>> m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]]) + | >>> m.result() + | 0.75 + | + | >>> m.reset_state() + | >>> m.update_state([[1], [1], [0], [0]], [[0.98], [1], [0], [0.6]], + | ... sample_weight=[1, 0, 0, 1]) + | >>> m.result() + | 0.5 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='binary_crossentropy', + | metrics=[keras.metrics.BinaryAccuracy()]) + | ``` + | + | Method resolution order: + | BinaryAccuracy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='binary_accuracy', + | dtype=None, + | threshold=0.5 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_binary_crossentropy.txt b/.tether/man/metric_binary_crossentropy.txt new file mode 100644 index 0000000000..5647f487b0 --- /dev/null +++ b/.tether/man/metric_binary_crossentropy.txt @@ -0,0 +1,68 @@ +Help on class BinaryCrossentropy in module keras.src.metrics.probabilistic_metrics: + +class BinaryCrossentropy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | BinaryCrossentropy(name='binary_crossentropy', dtype=None, from_logits=False, label_smoothing=0) + | + | Computes the crossentropy metric between the labels and predictions. + | + | This is the crossentropy metric class to be used when there are only two + | label classes (0 and 1). + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | from_logits: (Optional) Whether output is expected + | to be a logits tensor. By default, we consider + | that output encodes a probability distribution. + | label_smoothing: (Optional) Float in `[0, 1]`. + | When > 0, label values are smoothed, + | meaning the confidence on label values are relaxed. + | e.g. `label_smoothing=0.2` means that we will use + | a value of 0.1 for label "0" and 0.9 for label "1". + | + | Example: + | + | Example: + | + | >>> m = keras.metrics.BinaryCrossentropy() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + | >>> m.result() + | 0.81492424 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 0.9162905 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.BinaryCrossentropy()]) + | ``` + | + | Method resolution order: + | BinaryCrossentropy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='binary_crossentropy', + | dtype=None, + | from_logits=False, + | label_smoothing=0 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_binary_focal_crossentropy.txt b/.tether/man/metric_binary_focal_crossentropy.txt new file mode 100644 index 0000000000..ad3ddb52d5 --- /dev/null +++ b/.tether/man/metric_binary_focal_crossentropy.txt @@ -0,0 +1,60 @@ +__signature__ +keras.metrics.binary_focal_crossentropy( + y_true, + y_pred, + apply_class_balancing=False, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +__doc__ +Computes the binary focal crossentropy loss. + +According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it +helps to apply a focal factor to down-weight easy examples and focus more on +hard examples. By default, the focal tensor is computed as follows: + +`focal_factor = (1 - output) ** gamma` for class 1 +`focal_factor = output ** gamma` for class 0 +where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal +effect on the binary crossentropy loss. + +If `apply_class_balancing == True`, this function also takes into account a +weight balancing factor for the binary classes 0 and 1 as follows: + +`weight = alpha` for class 1 (`target == 1`) +`weight = 1 - alpha` for class 0 +where `alpha` is a float in the range of `[0, 1]`. + +Args: + y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. + y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. + apply_class_balancing: A bool, whether to apply weight balancing on the + binary classes 0 and 1. + alpha: A weight balancing factor for class 1, default is `0.25` as + mentioned in the reference. The weight for class 0 is `1.0 - alpha`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability distribution. + label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels by + squeezing them towards 0.5, that is, + using `1. - 0.5 * label_smoothing` for the target class + and `0.5 * label_smoothing` for the non-target class. + axis: The axis along which the mean is computed. Defaults to `-1`. + +Returns: + Binary focal crossentropy loss value + with shape = `[batch_size, d0, .. dN-1]`. + +Example: + +>>> y_true = [[0, 1], [0, 0]] +>>> y_pred = [[0.6, 0.4], [0.4, 0.6]] +>>> loss = keras.losses.binary_focal_crossentropy( +... y_true, y_pred, gamma=2) +>>> assert loss.shape == (2,) +>>> loss +array([0.330, 0.206], dtype=float32) diff --git a/.tether/man/metric_binary_iou.txt b/.tether/man/metric_binary_iou.txt new file mode 100644 index 0000000000..d0990adf26 --- /dev/null +++ b/.tether/man/metric_binary_iou.txt @@ -0,0 +1,122 @@ +Help on class BinaryIoU in module keras.src.metrics.iou_metrics: + +class BinaryIoU(IoU) + | BinaryIoU(target_class_ids=(0, 1), threshold=0.5, name=None, dtype=None) + | + | Computes the Intersection-Over-Union metric for class 0 and/or 1. + | + | Formula: + | + | ```python + | iou = true_positives / (true_positives + false_positives + false_negatives) + | ``` + | Intersection-Over-Union is a common evaluation metric for semantic image + | segmentation. + | + | To compute IoUs, the predictions are accumulated in a confusion matrix, + | weighted by `sample_weight` and the metric is then calculated from it. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | This class can be used to compute IoUs for a binary classification task + | where the predictions are provided as logits. First a `threshold` is applied + | to the predicted values such that those that are below the `threshold` are + | converted to class 0 and those that are above the `threshold` are converted + | to class 1. + | + | IoUs for classes 0 and 1 are then computed, the mean of IoUs for the classes + | that are specified by `target_class_ids` is returned. + | + | Note: with `threshold=0`, this metric has the same behavior as `IoU`. + | + | Args: + | target_class_ids: A tuple or list of target class ids for which the + | metric is returned. Options are `[0]`, `[1]`, or `[0, 1]`. With + | `[0]` (or `[1]`), the IoU metric for class 0 (or class 1, + | respectively) is returned. With `[0, 1]`, the mean of IoUs for the + | two classes is returned. + | threshold: A threshold that applies to the prediction logits to convert + | them to either predicted class 0 if the logit is below `threshold` + | or predicted class 1 if the logit is above `threshold`. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | Example: + | + | >>> m = keras.metrics.BinaryIoU(target_class_ids=[0, 1], threshold=0.3) + | >>> m.update_state([0, 1, 0, 1], [0.1, 0.2, 0.4, 0.7]) + | >>> m.result() + | 0.33333334 + | + | >>> m.reset_state() + | >>> m.update_state([0, 1, 0, 1], [0.1, 0.2, 0.4, 0.7], + | ... sample_weight=[0.2, 0.3, 0.4, 0.1]) + | >>> # cm = [[0.2, 0.4], + | >>> # [0.3, 0.1]] + | >>> # sum_row = [0.6, 0.4], sum_col = [0.5, 0.5], + | >>> # true_positives = [0.2, 0.1] + | >>> # iou = [0.222, 0.125] + | >>> m.result() + | 0.17361112 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.BinaryIoU( + | target_class_ids=[0], + | threshold=0.5 + | )] + | ) + | ``` + | + | Method resolution order: + | BinaryIoU + | IoU + | _IoUBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | target_class_ids=(0, 1), + | threshold=0.5, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulates the confusion matrix statistics. + | + | Before the confusion matrix is updated, the predicted values are + | thresholded to be: + | 0 for values that are smaller than the `threshold` + | 1 for values that are larger or equal to the `threshold` + | + | Args: + | y_true: The ground truth values. + | y_pred: The predicted values. + | sample_weight: Optional weighting of each example. Can + | be a `Tensor` whose rank is either 0, or the same as `y_true`, + | and must be broadcastable to `y_true`. Defaults to `1`. + | + | Returns: + | Update op. + | + diff --git a/.tether/man/metric_categorical_accuracy.txt b/.tether/man/metric_categorical_accuracy.txt new file mode 100644 index 0000000000..88accf2fa8 --- /dev/null +++ b/.tether/man/metric_categorical_accuracy.txt @@ -0,0 +1,69 @@ +Help on class CategoricalAccuracy in module keras.src.metrics.accuracy_metrics: + +class CategoricalAccuracy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | CategoricalAccuracy(name='categorical_accuracy', dtype=None) + | + | Calculates how often predictions match one-hot labels. + | + | You can provide logits of classes as `y_pred`, since argmax of + | logits and probabilities are same. + | + | This metric creates two local variables, `total` and `count` that are used + | to compute the frequency with which `y_pred` matches `y_true`. This + | frequency is ultimately returned as `categorical accuracy`: an idempotent + | operation that simply divides `total` by `count`. + | + | `y_pred` and `y_true` should be passed in as vectors of probabilities, + | rather than as labels. If necessary, use `ops.one_hot` to expand `y_true` as + | a vector. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.CategoricalAccuracy() + | >>> m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], + | ... [0.05, 0.95, 0]]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 0, 1], [0, 1, 0]], [[0.1, 0.9, 0.8], + | ... [0.05, 0.95, 0]], + | ... sample_weight=[0.7, 0.3]) + | >>> m.result() + | 0.3 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='categorical_crossentropy', + | metrics=[keras.metrics.CategoricalAccuracy()]) + | ``` + | + | Method resolution order: + | CategoricalAccuracy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='categorical_accuracy', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_categorical_crossentropy.txt b/.tether/man/metric_categorical_crossentropy.txt new file mode 100644 index 0000000000..228f2aaf1f --- /dev/null +++ b/.tether/man/metric_categorical_crossentropy.txt @@ -0,0 +1,82 @@ +Help on class CategoricalCrossentropy in module keras.src.metrics.probabilistic_metrics: + +class CategoricalCrossentropy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | CategoricalCrossentropy(name='categorical_crossentropy', dtype=None, from_logits=False, label_smoothing=0, axis=-1) + | + | Computes the crossentropy metric between the labels and predictions. + | + | This is the crossentropy metric class to be used when there are multiple + | label classes (2 or more). It assumes that labels are one-hot encoded, + | e.g., when labels values are `[2, 0, 1]`, then + | `y_true` is `[[0, 0, 1], [1, 0, 0], [0, 1, 0]]`. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | from_logits: (Optional) Whether output is expected to be + | a logits tensor. By default, we consider that output + | encodes a probability distribution. + | label_smoothing: (Optional) Float in `[0, 1]`. + | When > 0, label values are smoothed, meaning the confidence + | on label values are relaxed. e.g. `label_smoothing=0.2` means + | that we will use a value of 0.1 for label + | "0" and 0.9 for label "1". + | axis: (Optional) Defaults to `-1`. + | The dimension along which entropy is computed. + | + | Example: + | + | Example: + | + | >>> # EPSILON = 1e-7, y = y_true, y` = y_pred + | >>> # y` = clip_ops.clip_by_value(output, EPSILON, 1. - EPSILON) + | >>> # y` = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + | >>> # xent = -sum(y * log(y'), axis = -1) + | >>> # = -((log 0.95), (log 0.1)) + | >>> # = [0.051, 2.302] + | >>> # Reduced xent = (0.051 + 2.302) / 2 + | >>> m = keras.metrics.CategoricalCrossentropy() + | >>> m.update_state([[0, 1, 0], [0, 0, 1]], + | ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + | >>> m.result() + | 1.1769392 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 1, 0], [0, 0, 1]], + | ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + | ... sample_weight=np.array([0.3, 0.7])) + | >>> m.result() + | 1.6271976 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.CategoricalCrossentropy()]) + | ``` + | + | Method resolution order: + | CategoricalCrossentropy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='categorical_crossentropy', + | dtype=None, + | from_logits=False, + | label_smoothing=0, + | axis=-1 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_categorical_focal_crossentropy.txt b/.tether/man/metric_categorical_focal_crossentropy.txt new file mode 100644 index 0000000000..948c93a8ce --- /dev/null +++ b/.tether/man/metric_categorical_focal_crossentropy.txt @@ -0,0 +1,44 @@ +__signature__ +keras.metrics.categorical_focal_crossentropy( + y_true, + y_pred, + alpha=0.25, + gamma=2.0, + from_logits=False, + label_smoothing=0.0, + axis=-1 +) +__doc__ +Computes the categorical focal crossentropy loss. + +Args: + y_true: Tensor of one-hot true targets. + y_pred: Tensor of predicted targets. + alpha: A weight balancing factor for all classes, default is `0.25` as + mentioned in the reference. It can be a list of floats or a scalar. + In the multi-class case, alpha may be set by inverse class + frequency by using `compute_class_weight` from `sklearn.utils`. + gamma: A focusing parameter, default is `2.0` as mentioned in the + reference. It helps to gradually reduce the importance given to + simple examples in a smooth manner. When `gamma` = 0, there is + no focal effect on the categorical crossentropy. + from_logits: Whether `y_pred` is expected to be a logits tensor. By + default, we assume that `y_pred` encodes a probability + distribution. + label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For + example, if `0.1`, use `0.1 / num_classes` for non-target labels + and `0.9 + 0.1 / num_classes` for target labels. + axis: Defaults to `-1`. The dimension along which the entropy is + computed. + +Returns: + Categorical focal crossentropy loss value. + +Example: + +>>> y_true = [[0, 1, 0], [0, 0, 1]] +>>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]] +>>> loss = keras.losses.categorical_focal_crossentropy(y_true, y_pred) +>>> assert loss.shape == (2,) +>>> loss +array([2.63401289e-04, 6.75912094e-01], dtype=float32) diff --git a/.tether/man/metric_categorical_hinge.txt b/.tether/man/metric_categorical_hinge.txt new file mode 100644 index 0000000000..d6fdf8118a --- /dev/null +++ b/.tether/man/metric_categorical_hinge.txt @@ -0,0 +1,42 @@ +Help on class CategoricalHinge in module keras.src.metrics.hinge_metrics: + +class CategoricalHinge(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | CategoricalHinge(name='categorical_hinge', dtype=None) + | + | Computes the categorical hinge metric between `y_true` and `y_pred`. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | >>> m = keras.metrics.CategoricalHinge() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + | >>> m.result().numpy() + | 1.4000001 + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 1.2 + | + | Method resolution order: + | CategoricalHinge + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='categorical_hinge', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_cosine_similarity.txt b/.tether/man/metric_cosine_similarity.txt new file mode 100644 index 0000000000..e3164f33af --- /dev/null +++ b/.tether/man/metric_cosine_similarity.txt @@ -0,0 +1,71 @@ +Help on class CosineSimilarity in module keras.src.metrics.regression_metrics: + +class CosineSimilarity(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | CosineSimilarity(name='cosine_similarity', dtype=None, axis=-1) + | + | Computes the cosine similarity between the labels and predictions. + | + | Formula: + | + | ```python + | loss = sum(l2_norm(y_true) * l2_norm(y_pred)) + | ``` + | See: [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity). + | This metric keeps the average cosine similarity between `predictions` and + | `labels` over a stream of data. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | axis: (Optional) Defaults to `-1`. The dimension along which the cosine + | similarity is computed. + | + | Example: + | + | Example: + | + | >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]] + | >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]] + | >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]] + | >>> # result = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1)) + | >>> # = ((0. + 0.) + (0.5 + 0.5)) / 2 + | >>> m = keras.metrics.CosineSimilarity(axis=1) + | >>> m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]]) + | >>> m.result() + | 0.49999997 + | >>> m.reset_state() + | >>> m.update_state([[0., 1.], [1., 1.]], [[1., 0.], [1., 1.]], + | ... sample_weight=[0.3, 0.7]) + | >>> m.result() + | 0.6999999 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.CosineSimilarity(axis=1)]) + | ``` + | + | Method resolution order: + | CosineSimilarity + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='cosine_similarity', + | dtype=None, + | axis=-1 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_f1_score.txt b/.tether/man/metric_f1_score.txt new file mode 100644 index 0000000000..2bcedccf58 --- /dev/null +++ b/.tether/man/metric_f1_score.txt @@ -0,0 +1,74 @@ +Help on class F1Score in module keras.src.metrics.f_score_metrics: + +class F1Score(FBetaScore) + | F1Score(average=None, threshold=None, name='f1_score', dtype=None) + | + | Computes F-1 Score. + | + | Formula: + | + | ```python + | f1_score = 2 * (precision * recall) / (precision + recall) + | ``` + | This is the harmonic mean of precision and recall. + | Its output range is `[0, 1]`. It works for both multi-class + | and multi-label classification. + | + | Args: + | average: Type of averaging to be performed on data. + | Acceptable values are `None`, `"micro"`, `"macro"` + | and `"weighted"`. Defaults to `None`. + | If `None`, no averaging is performed and `result()` will return + | the score for each class. + | If `"micro"`, compute metrics globally by counting the total + | true positives, false negatives and false positives. + | If `"macro"`, compute metrics for each label, + | and return their unweighted mean. + | This does not take label imbalance into account. + | If `"weighted"`, compute metrics for each label, + | and return their average weighted by support + | (the number of true instances for each label). + | This alters `"macro"` to account for label imbalance. + | It can result in an score that is not between precision and recall. + | threshold: Elements of `y_pred` greater than `threshold` are + | converted to be 1, and the rest 0. If `threshold` is + | `None`, the argmax of `y_pred` is converted to 1, and the rest to 0. + | name: Optional. String name of the metric instance. + | dtype: Optional. Data type of the metric result. + | + | Returns: + | F-1 Score: float. + | + | Example: + | + | >>> metric = keras.metrics.F1Score(threshold=0.5) + | >>> y_true = np.array([[1, 1, 1], + | ... [1, 0, 0], + | ... [1, 1, 0]], np.int32) + | >>> y_pred = np.array([[0.2, 0.6, 0.7], + | ... [0.2, 0.6, 0.6], + | ... [0.6, 0.8, 0.0]], np.float32) + | >>> metric.update_state(y_true, y_pred) + | >>> result = metric.result() + | array([0.5 , 0.8 , 0.6666667], dtype=float32) + | + | Method resolution order: + | F1Score + | FBetaScore + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | average=None, + | threshold=None, + | name='f1_score', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the serializable config of the metric. + | diff --git a/.tether/man/metric_false_negatives.txt b/.tether/man/metric_false_negatives.txt new file mode 100644 index 0000000000..74b2f407fe --- /dev/null +++ b/.tether/man/metric_false_negatives.txt @@ -0,0 +1,54 @@ +Help on class FalseNegatives in module keras.src.metrics.confusion_metrics: + +class FalseNegatives(_ConfusionMatrixConditionCount) + | FalseNegatives(thresholds=None, name=None, dtype=None) + | + | Calculates the number of false negatives. + | + | If `sample_weight` is given, calculates the sum of the weights of + | false negatives. This metric creates one local variable, `accumulator` + | that is used to keep track of the number of false negatives. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | thresholds: (Optional) Defaults to `0.5`. A float value, or a Python + | list/tuple of float threshold values in `[0, 1]`. A threshold is + | compared with prediction values to determine the truth value of + | predictions (i.e., above the threshold is `True`, below is `False`). + | If used with a loss function that sets `from_logits=True` (i.e. no + | sigmoid applied to predictions), `thresholds` should be set to 0. + | One metric value is generated for each threshold value. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.FalseNegatives() + | >>> m.update_state([0, 1, 1, 1], [0, 1, 0, 0]) + | >>> m.result() + | 2.0 + | + | >>> m.reset_state() + | >>> m.update_state([0, 1, 1, 1], [0, 1, 0, 0], sample_weight=[0, 0, 1, 0]) + | >>> m.result() + | 1.0 + | + | Method resolution order: + | FalseNegatives + | _ConfusionMatrixConditionCount + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | thresholds=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/metric_false_positives.txt b/.tether/man/metric_false_positives.txt new file mode 100644 index 0000000000..b576b88cf9 --- /dev/null +++ b/.tether/man/metric_false_positives.txt @@ -0,0 +1,54 @@ +Help on class FalsePositives in module keras.src.metrics.confusion_metrics: + +class FalsePositives(_ConfusionMatrixConditionCount) + | FalsePositives(thresholds=None, name=None, dtype=None) + | + | Calculates the number of false positives. + | + | If `sample_weight` is given, calculates the sum of the weights of + | false positives. This metric creates one local variable, `accumulator` + | that is used to keep track of the number of false positives. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | thresholds: (Optional) Defaults to `0.5`. A float value, or a Python + | list/tuple of float threshold values in `[0, 1]`. A threshold is + | compared with prediction values to determine the truth value of + | predictions (i.e., above the threshold is `True`, below is `False`). + | If used with a loss function that sets `from_logits=True` (i.e. no + | sigmoid applied to predictions), `thresholds` should be set to 0. + | One metric value is generated for each threshold value. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Examples: + | + | >>> m = keras.metrics.FalsePositives() + | >>> m.update_state([0, 1, 0, 0], [0, 0, 1, 1]) + | >>> m.result() + | 2.0 + | + | >>> m.reset_state() + | >>> m.update_state([0, 1, 0, 0], [0, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + | >>> m.result() + | 1.0 + | + | Method resolution order: + | FalsePositives + | _ConfusionMatrixConditionCount + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | thresholds=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/metric_fbeta_score.txt b/.tether/man/metric_fbeta_score.txt new file mode 100644 index 0000000000..f1f058cdcb --- /dev/null +++ b/.tether/man/metric_fbeta_score.txt @@ -0,0 +1,100 @@ +Help on class FBetaScore in module keras.src.metrics.f_score_metrics: + +class FBetaScore(keras.src.metrics.metric.Metric) + | FBetaScore(average=None, beta=1.0, threshold=None, name='fbeta_score', dtype=None) + | + | Computes F-Beta score. + | + | Formula: + | + | ```python + | b2 = beta ** 2 + | f_beta_score = (1 + b2) * (precision * recall) / (precision * b2 + recall) + | ``` + | This is the weighted harmonic mean of precision and recall. + | Its output range is `[0, 1]`. It works for both multi-class + | and multi-label classification. + | + | Args: + | average: Type of averaging to be performed across per-class results + | in the multi-class case. + | Acceptable values are `None`, `"micro"`, `"macro"` and + | `"weighted"`. Defaults to `None`. + | If `None`, no averaging is performed and `result()` will return + | the score for each class. + | If `"micro"`, compute metrics globally by counting the total + | true positives, false negatives and false positives. + | If `"macro"`, compute metrics for each label, + | and return their unweighted mean. + | This does not take label imbalance into account. + | If `"weighted"`, compute metrics for each label, + | and return their average weighted by support + | (the number of true instances for each label). + | This alters `"macro"` to account for label imbalance. + | It can result in an score that is not between precision and recall. + | beta: Determines the weight of given to recall + | in the harmonic mean between precision and recall (see pseudocode + | equation above). Defaults to `1`. + | threshold: Elements of `y_pred` greater than `threshold` are + | converted to be 1, and the rest 0. If `threshold` is + | `None`, the argmax of `y_pred` is converted to 1, and the rest to 0. + | name: Optional. String name of the metric instance. + | dtype: Optional. Data type of the metric result. + | + | Returns: + | F-Beta Score: float. + | + | Example: + | + | >>> metric = keras.metrics.FBetaScore(beta=2.0, threshold=0.5) + | >>> y_true = np.array([[1, 1, 1], + | ... [1, 0, 0], + | ... [1, 1, 0]], np.int32) + | >>> y_pred = np.array([[0.2, 0.6, 0.7], + | ... [0.2, 0.6, 0.6], + | ... [0.6, 0.8, 0.0]], np.float32) + | >>> metric.update_state(y_true, y_pred) + | >>> result = metric.result() + | >>> result + | [0.3846154 , 0.90909094, 0.8333334 ] + | + | Method resolution order: + | FBetaScore + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | average=None, + | beta=1.0, + | threshold=None, + | name='fbeta_score', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the serializable config of the metric. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulate statistics for the metric. + | diff --git a/.tether/man/metric_hinge.txt b/.tether/man/metric_hinge.txt new file mode 100644 index 0000000000..91b2b9da78 --- /dev/null +++ b/.tether/man/metric_hinge.txt @@ -0,0 +1,46 @@ +Help on class Hinge in module keras.src.metrics.hinge_metrics: + +class Hinge(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | Hinge(name='hinge', dtype=None) + | + | Computes the hinge metric between `y_true` and `y_pred`. + | + | `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + | provided we will convert them to -1 or 1. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Examples: + | + | >>> m = keras.metrics.Hinge() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + | >>> m.result() + | 1.3 + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 1.1 + | + | Method resolution order: + | Hinge + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='hinge', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_huber.txt b/.tether/man/metric_huber.txt new file mode 100644 index 0000000000..1407147aa7 --- /dev/null +++ b/.tether/man/metric_huber.txt @@ -0,0 +1,37 @@ +__signature__ +keras.metrics.huber( + y_true, + y_pred, + delta=1.0 +) +__doc__ +Computes Huber loss value. + +Formula: +```python +for x in error: + if abs(x) <= delta: + loss.append(0.5 * x^2) + elif abs(x) > delta: + loss.append(delta * abs(x) - 0.5 * delta^2) + +loss = mean(loss, axis=-1) +``` +See: [Huber loss](https://en.wikipedia.org/wiki/Huber_loss). + +Example: + +>>> y_true = [[0, 1], [0, 0]] +>>> y_pred = [[0.6, 0.4], [0.4, 0.6]] +>>> loss = keras.losses.huber(y_true, y_pred) +0.155 + + +Args: + y_true: tensor of true targets. + y_pred: tensor of predicted targets. + delta: A float, the point where the Huber loss function changes from a + quadratic to linear. Defaults to `1.0`. + +Returns: + Tensor with one scalar loss entry per sample. diff --git a/.tether/man/metric_iou.txt b/.tether/man/metric_iou.txt new file mode 100644 index 0000000000..426104a021 --- /dev/null +++ b/.tether/man/metric_iou.txt @@ -0,0 +1,107 @@ +Help on class IoU in module keras.src.metrics.iou_metrics: + +class IoU(_IoUBase) + | IoU(num_classes, target_class_ids, name=None, dtype=None, ignore_class=None, sparse_y_true=True, sparse_y_pred=True, axis=-1) + | + | Computes the Intersection-Over-Union metric for specific target classes. + | + | Formula: + | + | ```python + | iou = true_positives / (true_positives + false_positives + false_negatives) + | ``` + | Intersection-Over-Union is a common evaluation metric for semantic image + | segmentation. + | + | To compute IoUs, the predictions are accumulated in a confusion matrix, + | weighted by `sample_weight` and the metric is then calculated from it. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Note, this class first computes IoUs for all individual classes, then + | returns the mean of IoUs for the classes that are specified by + | `target_class_ids`. If `target_class_ids` has only one id value, the IoU of + | that specific class is returned. + | + | Args: + | num_classes: The possible number of labels the prediction task can have. + | target_class_ids: A tuple or list of target class ids for which the + | metric is returned. To compute IoU for a specific class, a list + | (or tuple) of a single id value should be provided. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | ignore_class: Optional integer. The ID of a class to be ignored during + | metric computation. This is useful, for example, in segmentation + | problems featuring a "void" class (commonly -1 or 255) in + | segmentation maps. By default (`ignore_class=None`), all classes are + | considered. + | sparse_y_true: Whether labels are encoded using integers or + | dense floating point vectors. If `False`, the `argmax` function + | is used to determine each sample's most likely associated label. + | sparse_y_pred: Whether predictions are encoded using integers or + | dense floating point vectors. If `False`, the `argmax` function + | is used to determine each sample's most likely associated label. + | axis: (Optional) -1 is the dimension containing the logits. + | Defaults to `-1`. + | + | Examples: + | + | >>> # cm = [[1, 1], + | >>> # [1, 1]] + | >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + | >>> # iou = true_positives / (sum_row + sum_col - true_positives)) + | >>> # iou = [0.33, 0.33] + | >>> m = keras.metrics.IoU(num_classes=2, target_class_ids=[0]) + | >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1]) + | >>> m.result() + | 0.33333334 + | + | >>> m.reset_state() + | >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1], + | ... sample_weight=[0.3, 0.3, 0.3, 0.1]) + | >>> # cm = [[0.3, 0.3], + | >>> # [0.3, 0.1]] + | >>> # sum_row = [0.6, 0.4], sum_col = [0.6, 0.4], + | >>> # true_positives = [0.3, 0.1] + | >>> # iou = [0.33, 0.14] + | >>> m.result() + | 0.33333334 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.IoU(num_classes=2, target_class_ids=[0])]) + | ``` + | + | Method resolution order: + | IoU + | _IoUBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_classes, + | target_class_ids, + | name=None, + | dtype=None, + | ignore_class=None, + | sparse_y_true=True, + | sparse_y_pred=True, + | axis=-1 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | result(self) + | Compute the intersection-over-union via the confusion matrix. + | + diff --git a/.tether/man/metric_kl_divergence.txt b/.tether/man/metric_kl_divergence.txt new file mode 100644 index 0000000000..4af570fa0e --- /dev/null +++ b/.tether/man/metric_kl_divergence.txt @@ -0,0 +1,63 @@ +Help on class KLDivergence in module keras.src.metrics.probabilistic_metrics: + +class KLDivergence(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | KLDivergence(name='kl_divergence', dtype=None) + | + | Computes Kullback-Leibler divergence metric between `y_true` and + | `y_pred`. + | + | Formula: + | + | ```python + | metric = y_true * log(y_true / y_pred) + | ``` + | + | `y_true` and `y_pred` are expected to be probability + | distributions, with values between 0 and 1. They will get + | clipped to the `[0, 1]` range. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Examples: + | + | >>> m = keras.metrics.KLDivergence() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + | >>> m.result() + | 0.45814306 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 0.9162892 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.KLDivergence()]) + | ``` + | + | Method resolution order: + | KLDivergence + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='kl_divergence', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_log_cosh.txt b/.tether/man/metric_log_cosh.txt new file mode 100644 index 0000000000..5de1e5daa0 --- /dev/null +++ b/.tether/man/metric_log_cosh.txt @@ -0,0 +1,28 @@ +__signature__ +keras.metrics.log_cosh(y_true, y_pred) +__doc__ +Logarithm of the hyperbolic cosine of the prediction error. + +Formula: +```python +loss = mean(log(cosh(y_pred - y_true)), axis=-1) +``` + +Note that `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small +`x` and to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works +mostly like the mean squared error, but will not be so strongly affected by +the occasional wildly incorrect prediction. + +Example: + +>>> y_true = [[0., 1.], [0., 0.]] +>>> y_pred = [[1., 1.], [0., 0.]] +>>> loss = keras.losses.log_cosh(y_true, y_pred) +0.108 + +Args: + y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. + +Returns: + Logcosh error values with shape = `[batch_size, d0, .. dN-1]`. diff --git a/.tether/man/metric_log_cosh_error.txt b/.tether/man/metric_log_cosh_error.txt new file mode 100644 index 0000000000..ba5072c354 --- /dev/null +++ b/.tether/man/metric_log_cosh_error.txt @@ -0,0 +1,60 @@ +Help on class LogCoshError in module keras.src.metrics.regression_metrics: + +class LogCoshError(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | LogCoshError(name='logcosh', dtype=None) + | + | Computes the logarithm of the hyperbolic cosine of the prediction error. + | + | Formula: + | + | ```python + | error = y_pred - y_true + | logcosh = mean(log((exp(error) + exp(-error))/2), axis=-1) + | ``` + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | Example: + | + | >>> m = keras.metrics.LogCoshError() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + | >>> m.result() + | 0.10844523 + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 0.21689045 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.LogCoshError()]) + | ``` + | + | Method resolution order: + | LogCoshError + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='logcosh', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_mean.txt b/.tether/man/metric_mean.txt new file mode 100644 index 0000000000..ceda455395 --- /dev/null +++ b/.tether/man/metric_mean.txt @@ -0,0 +1,63 @@ +Help on class Mean in module keras.src.metrics.reduction_metrics: + +class Mean(keras.src.metrics.metric.Metric) + | Mean(name='mean', dtype=None) + | + | Compute the (weighted) mean of the given values. + | + | For example, if values is `[1, 3, 5, 7]` then the mean is 4. + | If `sample_weight` was specified as `[1, 1, 0, 0]` then the mean would be 2. + | + | This metric creates two variables, `total` and `count`. + | The mean value returned is simply `total` divided by `count`. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = Mean() + | >>> m.update_state([1, 3, 5, 7]) + | >>> m.result() + | 4.0 + | + | >>> m.reset_state() + | >>> m.update_state([1, 3, 5, 7], sample_weight=[1, 1, 0, 0]) + | >>> m.result() + | 2.0 + | ``` + | + | Method resolution order: + | Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='mean', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | values, + | sample_weight=None + | ) + | Accumulate statistics for the metric. + | diff --git a/.tether/man/metric_mean_absolute_error.txt b/.tether/man/metric_mean_absolute_error.txt new file mode 100644 index 0000000000..178c5d44bc --- /dev/null +++ b/.tether/man/metric_mean_absolute_error.txt @@ -0,0 +1,58 @@ +Help on class MeanAbsoluteError in module keras.src.metrics.regression_metrics: + +class MeanAbsoluteError(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | MeanAbsoluteError(name='mean_absolute_error', dtype=None) + | + | Computes the mean absolute error between the labels and predictions. + | + | Formula: + | + | ```python + | loss = mean(abs(y_true - y_pred)) + | ``` + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Examples: + | + | >>> m = keras.metrics.MeanAbsoluteError() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + | >>> m.result() + | 0.25 + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 0.5 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.MeanAbsoluteError()]) + | ``` + | + | Method resolution order: + | MeanAbsoluteError + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='mean_absolute_error', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_mean_absolute_percentage_error.txt b/.tether/man/metric_mean_absolute_percentage_error.txt new file mode 100644 index 0000000000..c968a1ce3a --- /dev/null +++ b/.tether/man/metric_mean_absolute_percentage_error.txt @@ -0,0 +1,60 @@ +Help on class MeanAbsolutePercentageError in module keras.src.metrics.regression_metrics: + +class MeanAbsolutePercentageError(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | MeanAbsolutePercentageError(name='mean_absolute_percentage_error', dtype=None) + | + | Computes mean absolute percentage error between `y_true` and `y_pred`. + | + | Formula: + | + | ```python + | loss = 100 * mean(abs((y_true - y_pred) / y_true)) + | ``` + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | Example: + | + | >>> m = keras.metrics.MeanAbsolutePercentageError() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + | >>> m.result() + | 250000000.0 + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 500000000.0 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.MeanAbsolutePercentageError()]) + | ``` + | + | Method resolution order: + | MeanAbsolutePercentageError + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='mean_absolute_percentage_error', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_mean_iou.txt b/.tether/man/metric_mean_iou.txt new file mode 100644 index 0000000000..daa7918123 --- /dev/null +++ b/.tether/man/metric_mean_iou.txt @@ -0,0 +1,97 @@ +Help on class MeanIoU in module keras.src.metrics.iou_metrics: + +class MeanIoU(IoU) + | MeanIoU(num_classes, name=None, dtype=None, ignore_class=None, sparse_y_true=True, sparse_y_pred=True, axis=-1) + | + | Computes the mean Intersection-Over-Union metric. + | + | Formula: + | + | ```python + | iou = true_positives / (true_positives + false_positives + false_negatives) + | ``` + | Intersection-Over-Union is a common evaluation metric for semantic image + | segmentation. + | + | To compute IoUs, the predictions are accumulated in a confusion matrix, + | weighted by `sample_weight` and the metric is then calculated from it. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Note that this class first computes IoUs for all individual classes, then + | returns the mean of these values. + | + | Args: + | num_classes: The possible number of labels the prediction task can have. + | This value must be provided, since a confusion matrix of dimension = + | [num_classes, num_classes] will be allocated. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | ignore_class: Optional integer. The ID of a class to be ignored during + | metric computation. This is useful, for example, in segmentation + | problems featuring a "void" class (commonly -1 or 255) in + | segmentation maps. By default (`ignore_class=None`), all classes are + | considered. + | sparse_y_true: Whether labels are encoded using integers or + | dense floating point vectors. If `False`, the `argmax` function + | is used to determine each sample's most likely associated label. + | sparse_y_pred: Whether predictions are encoded using integers or + | dense floating point vectors. If `False`, the `argmax` function + | is used to determine each sample's most likely associated label. + | axis: (Optional) The dimension containing the logits. Defaults to `-1`. + | + | Example: + | + | Example: + | + | >>> # cm = [[1, 1], + | >>> # [1, 1]] + | >>> # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] + | >>> # iou = true_positives / (sum_row + sum_col - true_positives)) + | >>> # result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 = 0.33 + | >>> m = keras.metrics.MeanIoU(num_classes=2) + | >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1]) + | >>> m.result() + | 0.33333334 + | + | >>> m.reset_state() + | >>> m.update_state([0, 0, 1, 1], [0, 1, 0, 1], + | ... sample_weight=[0.3, 0.3, 0.3, 0.1]) + | >>> m.result().numpy() + | 0.23809525 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.MeanIoU(num_classes=2)]) + | ``` + | + | Method resolution order: + | MeanIoU + | IoU + | _IoUBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_classes, + | name=None, + | dtype=None, + | ignore_class=None, + | sparse_y_true=True, + | sparse_y_pred=True, + | axis=-1 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_mean_squared_error.txt b/.tether/man/metric_mean_squared_error.txt new file mode 100644 index 0000000000..9206a0c72e --- /dev/null +++ b/.tether/man/metric_mean_squared_error.txt @@ -0,0 +1,43 @@ +Help on class MeanSquaredError in module keras.src.metrics.regression_metrics: + +class MeanSquaredError(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | MeanSquaredError(name='mean_squared_error', dtype=None) + | + | Computes the mean squared error between `y_true` and `y_pred`. + | + | Formula: + | + | ```python + | loss = mean(square(y_true - y_pred)) + | ``` + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.MeanSquaredError() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + | >>> m.result() + | 0.25 + | + | Method resolution order: + | MeanSquaredError + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='mean_squared_error', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | diff --git a/.tether/man/metric_mean_squared_logarithmic_error.txt b/.tether/man/metric_mean_squared_logarithmic_error.txt new file mode 100644 index 0000000000..a1c5bfdb95 --- /dev/null +++ b/.tether/man/metric_mean_squared_logarithmic_error.txt @@ -0,0 +1,60 @@ +Help on class MeanSquaredLogarithmicError in module keras.src.metrics.regression_metrics: + +class MeanSquaredLogarithmicError(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | MeanSquaredLogarithmicError(name='mean_squared_logarithmic_error', dtype=None) + | + | Computes mean squared logarithmic error between `y_true` and `y_pred`. + | + | Formula: + | + | ```python + | loss = mean(square(log(y_true + 1) - log(y_pred + 1))) + | ``` + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | Example: + | + | >>> m = keras.metrics.MeanSquaredLogarithmicError() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + | >>> m.result() + | 0.12011322 + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 0.24022643 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.MeanSquaredLogarithmicError()]) + | ``` + | + | Method resolution order: + | MeanSquaredLogarithmicError + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='mean_squared_logarithmic_error', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_mean_wrapper.txt b/.tether/man/metric_mean_wrapper.txt new file mode 100644 index 0000000000..07b8dd1970 --- /dev/null +++ b/.tether/man/metric_mean_wrapper.txt @@ -0,0 +1,62 @@ +Help on class MeanMetricWrapper in module keras.src.metrics.reduction_metrics: + +class MeanMetricWrapper(Mean) + | MeanMetricWrapper(fn, name=None, dtype=None, **kwargs) + | + | Wrap a stateless metric function with the `Mean` metric. + | + | You could use this class to quickly build a mean metric from a function. The + | function needs to have the signature `fn(y_true, y_pred)` and return a + | per-sample loss array. `MeanMetricWrapper.result()` will return + | the average metric value across all samples seen so far. + | + | For example: + | + | ```python + | def mse(y_true, y_pred): + | return (y_true - y_pred) ** 2 + | + | mse_metric = MeanMetricWrapper(fn=mse) + | ``` + | + | Args: + | fn: The metric function to wrap, with signature + | `fn(y_true, y_pred, **kwargs)`. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | **kwargs: Keyword arguments to pass on to `fn`. + | + | Method resolution order: + | MeanMetricWrapper + | Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | fn, + | name=None, + | dtype=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulate statistics for the metric. + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | + diff --git a/.tether/man/metric_one_hot_iou.txt b/.tether/man/metric_one_hot_iou.txt new file mode 100644 index 0000000000..7322963d00 --- /dev/null +++ b/.tether/man/metric_one_hot_iou.txt @@ -0,0 +1,111 @@ +Help on class OneHotIoU in module keras.src.metrics.iou_metrics: + +class OneHotIoU(IoU) + | OneHotIoU(num_classes, target_class_ids, name=None, dtype=None, ignore_class=None, sparse_y_pred=False, axis=-1) + | + | Computes the Intersection-Over-Union metric for one-hot encoded labels. + | + | Formula: + | + | ```python + | iou = true_positives / (true_positives + false_positives + false_negatives) + | ``` + | Intersection-Over-Union is a common evaluation metric for semantic image + | segmentation. + | + | To compute IoUs, the predictions are accumulated in a confusion matrix, + | weighted by `sample_weight` and the metric is then calculated from it. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | This class can be used to compute IoU for multi-class classification tasks + | where the labels are one-hot encoded (the last axis should have one + | dimension per class). Note that the predictions should also have the same + | shape. To compute the IoU, first the labels and predictions are converted + | back into integer format by taking the argmax over the class axis. Then the + | same computation steps as for the base `IoU` class apply. + | + | Note, if there is only one channel in the labels and predictions, this class + | is the same as class `IoU`. In this case, use `IoU` instead. + | + | Also, make sure that `num_classes` is equal to the number of classes in the + | data, to avoid a "labels out of bound" error when the confusion matrix is + | computed. + | + | Args: + | num_classes: The possible number of labels the prediction task can have. + | target_class_ids: A tuple or list of target class ids for which the + | metric is returned. To compute IoU for a specific class, a list + | (or tuple) of a single id value should be provided. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | ignore_class: Optional integer. The ID of a class to be ignored during + | metric computation. This is useful, for example, in segmentation + | problems featuring a "void" class (commonly -1 or 255) in + | segmentation maps. By default (`ignore_class=None`), all classes are + | considered. + | sparse_y_pred: Whether predictions are encoded using integers or + | dense floating point vectors. If `False`, the `argmax` function + | is used to determine each sample's most likely associated label. + | axis: (Optional) The dimension containing the logits. Defaults to `-1`. + | + | Example: + | + | Example: + | + | >>> y_true = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) + | >>> y_pred = np.array([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], + | ... [0.1, 0.4, 0.5]]) + | >>> sample_weight = [0.1, 0.2, 0.3, 0.4] + | >>> m = keras.metrics.OneHotIoU(num_classes=3, target_class_ids=[0, 2]) + | >>> m.update_state( + | ... y_true=y_true, y_pred=y_pred, sample_weight=sample_weight) + | >>> # cm = [[0, 0, 0.2+0.4], + | >>> # [0.3, 0, 0], + | >>> # [0, 0, 0.1]] + | >>> # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] + | >>> # true_positives = [0, 0, 0.1] + | >>> # single_iou = true_positives / (sum_row + sum_col - true_positives)) + | >>> # mean_iou = (0 / (0.3 + 0.6 - 0) + 0.1 / (0.7 + 0.1 - 0.1)) / 2 + | >>> m.result() + | 0.071 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.OneHotIoU( + | num_classes=3, + | target_class_id=[1] + | )] + | ) + | ``` + | + | Method resolution order: + | OneHotIoU + | IoU + | _IoUBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_classes, + | target_class_ids, + | name=None, + | dtype=None, + | ignore_class=None, + | sparse_y_pred=False, + | axis=-1 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_one_hot_mean_iou.txt b/.tether/man/metric_one_hot_mean_iou.txt new file mode 100644 index 0000000000..8ec82cd3c9 --- /dev/null +++ b/.tether/man/metric_one_hot_mean_iou.txt @@ -0,0 +1,106 @@ +Help on class OneHotMeanIoU in module keras.src.metrics.iou_metrics: + +class OneHotMeanIoU(MeanIoU) + | OneHotMeanIoU(num_classes, name=None, dtype=None, ignore_class=None, sparse_y_pred=False, axis=-1) + | + | Computes mean Intersection-Over-Union metric for one-hot encoded labels. + | + | Formula: + | + | ```python + | iou = true_positives / (true_positives + false_positives + false_negatives) + | ``` + | Intersection-Over-Union is a common evaluation metric for semantic image + | segmentation. + | + | To compute IoUs, the predictions are accumulated in a confusion matrix, + | weighted by `sample_weight` and the metric is then calculated from it. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | This class can be used to compute the mean IoU for multi-class + | classification tasks where the labels are one-hot encoded (the last axis + | should have one dimension per class). Note that the predictions should also + | have the same shape. To compute the mean IoU, first the labels and + | predictions are converted back into integer format by taking the argmax over + | the class axis. Then the same computation steps as for the base `MeanIoU` + | class apply. + | + | Note, if there is only one channel in the labels and predictions, this class + | is the same as class `MeanIoU`. In this case, use `MeanIoU` instead. + | + | Also, make sure that `num_classes` is equal to the number of classes in the + | data, to avoid a "labels out of bound" error when the confusion matrix is + | computed. + | + | Args: + | num_classes: The possible number of labels the prediction task can have. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | ignore_class: Optional integer. The ID of a class to be ignored during + | metric computation. This is useful, for example, in segmentation + | problems featuring a "void" class (commonly -1 or 255) in + | segmentation maps. By default (`ignore_class=None`), all classes are + | considered. + | sparse_y_pred: Whether predictions are encoded using natural numbers or + | probability distribution vectors. If `False`, the `argmax` + | function will be used to determine each sample's most likely + | associated label. + | axis: (Optional) The dimension containing the logits. Defaults to `-1`. + | + | Example: + | + | Example: + | + | >>> y_true = np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0], [1, 0, 0]]) + | >>> y_pred = np.array([[0.2, 0.3, 0.5], [0.1, 0.2, 0.7], [0.5, 0.3, 0.1], + | ... [0.1, 0.4, 0.5]]) + | >>> sample_weight = [0.1, 0.2, 0.3, 0.4] + | >>> m = keras.metrics.OneHotMeanIoU(num_classes=3) + | >>> m.update_state( + | ... y_true=y_true, y_pred=y_pred, sample_weight=sample_weight) + | >>> # cm = [[0, 0, 0.2+0.4], + | >>> # [0.3, 0, 0], + | >>> # [0, 0, 0.1]] + | >>> # sum_row = [0.3, 0, 0.7], sum_col = [0.6, 0.3, 0.1] + | >>> # true_positives = [0, 0, 0.1] + | >>> # single_iou = true_positives / (sum_row + sum_col - true_positives)) + | >>> # mean_iou = (0 + 0 + 0.1 / (0.7 + 0.1 - 0.1)) / 3 + | >>> m.result() + | 0.048 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.OneHotMeanIoU(num_classes=3)]) + | ``` + | + | Method resolution order: + | OneHotMeanIoU + | MeanIoU + | IoU + | _IoUBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | num_classes, + | name=None, + | dtype=None, + | ignore_class=None, + | sparse_y_pred=False, + | axis=-1 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_poisson.txt b/.tether/man/metric_poisson.txt new file mode 100644 index 0000000000..42ac18bdd6 --- /dev/null +++ b/.tether/man/metric_poisson.txt @@ -0,0 +1,60 @@ +Help on class Poisson in module keras.src.metrics.probabilistic_metrics: + +class Poisson(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | Poisson(name='poisson', dtype=None) + | + | Computes the Poisson metric between `y_true` and `y_pred`. + | + | Formula: + | + | ```python + | metric = y_pred - y_true * log(y_pred) + | ``` + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | Example: + | + | >>> m = keras.metrics.Poisson() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + | >>> m.result() + | 0.49999997 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 0.99999994 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.Poisson()]) + | ``` + | + | Method resolution order: + | Poisson + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='poisson', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_precision.txt b/.tether/man/metric_precision.txt new file mode 100644 index 0000000000..98beddf038 --- /dev/null +++ b/.tether/man/metric_precision.txt @@ -0,0 +1,135 @@ +Help on class Precision in module keras.src.metrics.confusion_metrics: + +class Precision(keras.src.metrics.metric.Metric) + | Precision(thresholds=None, top_k=None, class_id=None, name=None, dtype=None) + | + | Computes the precision of the predictions with respect to the labels. + | + | The metric creates two local variables, `true_positives` and + | `false_positives` that are used to compute the precision. This value is + | ultimately returned as `precision`, an idempotent operation that simply + | divides `true_positives` by the sum of `true_positives` and + | `false_positives`. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | If `top_k` is set, we'll calculate precision as how often on average a class + | among the top-k classes with the highest predicted values of a batch entry + | is correct and can be found in the label for that entry. + | + | If `class_id` is specified, we calculate precision by considering only the + | entries in the batch for which `class_id` is above the threshold and/or in + | the top-k highest predictions, and computing the fraction of them for which + | `class_id` is indeed a correct label. + | + | Args: + | thresholds: (Optional) A float value, or a Python list/tuple of float + | threshold values in `[0, 1]`. A threshold is compared with + | prediction values to determine the truth value of predictions (i.e., + | above the threshold is `True`, below is `False`). If used with a + | loss function that sets `from_logits=True` (i.e. no sigmoid applied + | to predictions), `thresholds` should be set to 0. One metric value + | is generated for each threshold value. If neither `thresholds` nor + | `top_k` are set, the default is to calculate precision with + | `thresholds=0.5`. + | top_k: (Optional) Unset by default. An int value specifying the top-k + | predictions to consider when calculating precision. + | class_id: (Optional) Integer class ID for which we want binary metrics. + | This must be in the half-open interval `[0, num_classes)`, where + | `num_classes` is the last dimension of predictions. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.Precision() + | >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) + | >>> m.result() + | 0.6666667 + | + | >>> m.reset_state() + | >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + | >>> m.result() + | 1.0 + | + | >>> # With top_k=2, it will calculate precision over y_true[:2] + | >>> # and y_pred[:2] + | >>> m = keras.metrics.Precision(top_k=2) + | >>> m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) + | >>> m.result() + | 0.0 + | + | >>> # With top_k=4, it will calculate precision over y_true[:4] + | >>> # and y_pred[:4] + | >>> m = keras.metrics.Precision(top_k=4) + | >>> m.update_state([0, 0, 1, 1], [1, 1, 1, 1]) + | >>> m.result() + | 0.5 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='binary_crossentropy', + | metrics=[keras.metrics.Precision()]) + | ``` + | + | Usage with a loss with `from_logits=True`: + | + | ```python + | model.compile(optimizer='adam', + | loss=keras.losses.BinaryCrossentropy(from_logits=True), + | metrics=[keras.metrics.Precision(thresholds=0)]) + | ``` + | + | Method resolution order: + | Precision + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | thresholds=None, + | top_k=None, + | class_id=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulates true positive and false positive statistics. + | + | Args: + | y_true: The ground truth values, with the same dimensions as + | `y_pred`. Will be cast to `bool`. + | y_pred: The predicted values. Each element must be in the range + | `[0, 1]`. + | sample_weight: Optional weighting of each example. Defaults to `1`. + | Can be a tensor whose rank is either 0, or the same rank as + | `y_true`, and must be broadcastable to `y_true`. + | + diff --git a/.tether/man/metric_precision_at_recall.txt b/.tether/man/metric_precision_at_recall.txt new file mode 100644 index 0000000000..590f849d5a --- /dev/null +++ b/.tether/man/metric_precision_at_recall.txt @@ -0,0 +1,80 @@ +Help on class PrecisionAtRecall in module keras.src.metrics.confusion_metrics: + +class PrecisionAtRecall(SensitivitySpecificityBase) + | PrecisionAtRecall(recall, num_thresholds=200, class_id=None, name=None, dtype=None) + | + | Computes best precision where recall is >= specified value. + | + | This metric creates four local variables, `true_positives`, + | `true_negatives`, `false_positives` and `false_negatives` that are used to + | compute the precision at the given recall. The threshold for the given + | recall value is computed and used to evaluate the corresponding precision. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | If `class_id` is specified, we calculate precision by considering only the + | entries in the batch for which `class_id` is above the threshold + | predictions, and computing the fraction of them for which `class_id` is + | indeed a correct label. + | + | Args: + | recall: A scalar value in range `[0, 1]`. + | num_thresholds: (Optional) Defaults to 200. The number of thresholds to + | use for matching the given recall. + | class_id: (Optional) Integer class ID for which we want binary metrics. + | This must be in the half-open interval `[0, num_classes)`, where + | `num_classes` is the last dimension of predictions. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.PrecisionAtRecall(0.5) + | >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], + | ... sample_weight=[2, 2, 2, 1, 1]) + | >>> m.result() + | 0.33333333 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='binary_crossentropy', + | metrics=[keras.metrics.PrecisionAtRecall(recall=0.8)]) + | ``` + | + | Method resolution order: + | PrecisionAtRecall + | SensitivitySpecificityBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | recall, + | num_thresholds=200, + | class_id=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + diff --git a/.tether/man/metric_r2_score.txt b/.tether/man/metric_r2_score.txt new file mode 100644 index 0000000000..4b088a70b9 --- /dev/null +++ b/.tether/man/metric_r2_score.txt @@ -0,0 +1,104 @@ +Help on class R2Score in module keras.src.metrics.regression_metrics: + +class R2Score(keras.src.metrics.metric.Metric) + | R2Score(class_aggregation='uniform_average', num_regressors=0, name='r2_score', dtype=None) + | + | Computes R2 score. + | + | Formula: + | + | ```python + | sum_squares_residuals = sum((y_true - y_pred) ** 2) + | sum_squares = sum((y_true - mean(y_true)) ** 2) + | R2 = 1 - sum_squares_residuals / sum_squares + | ``` + | + | This is also called the + | [coefficient of determination]( + | https://en.wikipedia.org/wiki/Coefficient_of_determination). + | + | It indicates how close the fitted regression line + | is to ground-truth data. + | + | - The highest score possible is 1.0. It indicates that the predictors + | perfectly accounts for variation in the target. + | - A score of 0.0 indicates that the predictors do not + | account for variation in the target. + | - It can also be negative if the model is worse than random. + | + | This metric can also compute the "Adjusted R2" score. + | + | Args: + | class_aggregation: Specifies how to aggregate scores corresponding to + | different output classes (or target dimensions), + | i.e. different dimensions on the last axis of the predictions. + | Equivalent to `multioutput` argument in Scikit-Learn. + | Should be one of + | `None` (no aggregation), `"uniform_average"`, + | `"variance_weighted_average"`. + | num_regressors: Number of independent regressors used + | ("Adjusted R2" score). 0 is the standard R2 score. + | Defaults to `0`. + | name: Optional. string name of the metric instance. + | dtype: Optional. data type of the metric result. + | + | Example: + | + | >>> y_true = np.array([[1], [4], [3]], dtype=np.float32) + | >>> y_pred = np.array([[2], [4], [4]], dtype=np.float32) + | >>> metric = keras.metrics.R2Score() + | >>> metric.update_state(y_true, y_pred) + | >>> result = metric.result() + | >>> result + | 0.57142854 + | + | Method resolution order: + | R2Score + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | class_aggregation='uniform_average', + | num_regressors=0, + | name='r2_score', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulates root mean squared error statistics. + | + | Args: + | y_true: The ground truth values. + | y_pred: The predicted values. + | sample_weight: Optional weighting of each example. Can + | be a `Tensor` whose rank is either 0, or the same rank as + | `y_true`, and must be broadcastable to `y_true`. + | Defaults to `1`. + | + | Returns: + | Update op. + | diff --git a/.tether/man/metric_recall.txt b/.tether/man/metric_recall.txt new file mode 100644 index 0000000000..a944e301f6 --- /dev/null +++ b/.tether/man/metric_recall.txt @@ -0,0 +1,119 @@ +Help on class Recall in module keras.src.metrics.confusion_metrics: + +class Recall(keras.src.metrics.metric.Metric) + | Recall(thresholds=None, top_k=None, class_id=None, name=None, dtype=None) + | + | Computes the recall of the predictions with respect to the labels. + | + | This metric creates two local variables, `true_positives` and + | `false_negatives`, that are used to compute the recall. This value is + | ultimately returned as `recall`, an idempotent operation that simply divides + | `true_positives` by the sum of `true_positives` and `false_negatives`. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | If `top_k` is set, recall will be computed as how often on average a class + | among the labels of a batch entry is in the top-k predictions. + | + | If `class_id` is specified, we calculate recall by considering only the + | entries in the batch for which `class_id` is in the label, and computing the + | fraction of them for which `class_id` is above the threshold and/or in the + | top-k predictions. + | + | Args: + | thresholds: (Optional) A float value, or a Python list/tuple of float + | threshold values in `[0, 1]`. A threshold is compared with + | prediction values to determine the truth value of predictions (i.e., + | above the threshold is `True`, below is `False`). If used with a + | loss function that sets `from_logits=True` (i.e. no sigmoid + | applied to predictions), `thresholds` should be set to 0. + | One metric value is generated for each threshold value. + | If neither `thresholds` nor `top_k` are set, + | the default is to calculate recall with `thresholds=0.5`. + | top_k: (Optional) Unset by default. An int value specifying the top-k + | predictions to consider when calculating recall. + | class_id: (Optional) Integer class ID for which we want binary metrics. + | This must be in the half-open interval `[0, num_classes)`, where + | `num_classes` is the last dimension of predictions. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.Recall() + | >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) + | >>> m.result() + | 0.6666667 + | + | >>> m.reset_state() + | >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + | >>> m.result() + | 1.0 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='binary_crossentropy', + | metrics=[keras.metrics.Recall()]) + | ``` + | + | Usage with a loss with `from_logits=True`: + | + | ```python + | model.compile(optimizer='adam', + | loss=keras.losses.BinaryCrossentropy(from_logits=True), + | metrics=[keras.metrics.Recall(thresholds=0)]) + | ``` + | + | Method resolution order: + | Recall + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | thresholds=None, + | top_k=None, + | class_id=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulates true positive and false negative statistics. + | + | Args: + | y_true: The ground truth values, with the same dimensions as + | `y_pred`. Will be cast to `bool`. + | y_pred: The predicted values. Each element must be in the range + | `[0, 1]`. + | sample_weight: Optional weighting of each example. Defaults to `1`. + | Can be a tensor whose rank is either 0, or the same rank as + | `y_true`, and must be broadcastable to `y_true`. + | + diff --git a/.tether/man/metric_recall_at_precision.txt b/.tether/man/metric_recall_at_precision.txt new file mode 100644 index 0000000000..952931207a --- /dev/null +++ b/.tether/man/metric_recall_at_precision.txt @@ -0,0 +1,83 @@ +Help on class RecallAtPrecision in module keras.src.metrics.confusion_metrics: + +class RecallAtPrecision(SensitivitySpecificityBase) + | RecallAtPrecision(precision, num_thresholds=200, class_id=None, name=None, dtype=None) + | + | Computes best recall where precision is >= specified value. + | + | For a given score-label-distribution the required precision might not + | be achievable, in this case 0.0 is returned as recall. + | + | This metric creates four local variables, `true_positives`, + | `true_negatives`, `false_positives` and `false_negatives` that are used to + | compute the recall at the given precision. The threshold for the given + | precision value is computed and used to evaluate the corresponding recall. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | If `class_id` is specified, we calculate precision by considering only the + | entries in the batch for which `class_id` is above the threshold + | predictions, and computing the fraction of them for which `class_id` is + | indeed a correct label. + | + | Args: + | precision: A scalar value in range `[0, 1]`. + | num_thresholds: (Optional) Defaults to 200. The number of thresholds + | to use for matching the given precision. + | class_id: (Optional) Integer class ID for which we want binary metrics. + | This must be in the half-open interval `[0, num_classes)`, where + | `num_classes` is the last dimension of predictions. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.RecallAtPrecision(0.8) + | >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([0, 0, 1, 1], [0, 0.5, 0.3, 0.9], + | ... sample_weight=[1, 0, 0, 1]) + | >>> m.result() + | 1.0 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='binary_crossentropy', + | metrics=[keras.metrics.RecallAtPrecision(precision=0.8)]) + | ``` + | + | Method resolution order: + | RecallAtPrecision + | SensitivitySpecificityBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | precision, + | num_thresholds=200, + | class_id=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + diff --git a/.tether/man/metric_root_mean_squared_error.txt b/.tether/man/metric_root_mean_squared_error.txt new file mode 100644 index 0000000000..4f9dd906c3 --- /dev/null +++ b/.tether/man/metric_root_mean_squared_error.txt @@ -0,0 +1,82 @@ +Help on class RootMeanSquaredError in module keras.src.metrics.regression_metrics: + +class RootMeanSquaredError(keras.src.metrics.reduction_metrics.Mean) + | RootMeanSquaredError(name='root_mean_squared_error', dtype=None) + | + | Computes root mean squared error metric between `y_true` and `y_pred`. + | + | Formula: + | + | ```python + | loss = sqrt(mean((y_pred - y_true) ** 2)) + | ``` + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | Example: + | + | >>> m = keras.metrics.RootMeanSquaredError() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[1, 1], [0, 0]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 0.70710677 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.RootMeanSquaredError()]) + | ``` + | + | Method resolution order: + | RootMeanSquaredError + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='root_mean_squared_error', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | y_true, + | y_pred, + | sample_weight=None + | ) + | Accumulates root mean squared error statistics. + | + | Args: + | y_true: The ground truth values. + | y_pred: The predicted values. + | sample_weight: Optional weighting of each example. Can + | be a `Tensor` whose rank is either 0, or the same rank as + | `y_true`, and must be broadcastable to `y_true`. + | Defaults to `1`. + | + | Returns: + | Update op. + | + diff --git a/.tether/man/metric_sensitivity_at_specificity.txt b/.tether/man/metric_sensitivity_at_specificity.txt new file mode 100644 index 0000000000..34fd781beb --- /dev/null +++ b/.tether/man/metric_sensitivity_at_specificity.txt @@ -0,0 +1,89 @@ +Help on class SensitivityAtSpecificity in module keras.src.metrics.confusion_metrics: + +class SensitivityAtSpecificity(SensitivitySpecificityBase) + | SensitivityAtSpecificity(specificity, num_thresholds=200, class_id=None, name=None, dtype=None) + | + | Computes best sensitivity where specificity is >= specified value. + | + | `Sensitivity` measures the proportion of actual positives that are correctly + | identified as such `(tp / (tp + fn))`. + | `Specificity` measures the proportion of actual negatives that are correctly + | identified as such `(tn / (tn + fp))`. + | + | This metric creates four local variables, `true_positives`, + | `true_negatives`, `false_positives` and `false_negatives` that are used to + | compute the sensitivity at the given specificity. The threshold for the + | given specificity value is computed and used to evaluate the corresponding + | sensitivity. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | If `class_id` is specified, we calculate precision by considering only the + | entries in the batch for which `class_id` is above the threshold + | predictions, and computing the fraction of them for which `class_id` is + | indeed a correct label. + | + | For additional information about specificity and sensitivity, see + | [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). + | + | Args: + | specificity: A scalar value in range `[0, 1]`. + | num_thresholds: (Optional) Defaults to 200. The number of thresholds to + | use for matching the given specificity. + | class_id: (Optional) Integer class ID for which we want binary metrics. + | This must be in the half-open interval `[0, num_classes)`, where + | `num_classes` is the last dimension of predictions. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.SensitivityAtSpecificity(0.5) + | >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], + | ... sample_weight=[1, 1, 2, 2, 1]) + | >>> m.result() + | 0.333333 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='binary_crossentropy', + | metrics=[keras.metrics.SensitivityAtSpecificity()]) + | ``` + | + | Method resolution order: + | SensitivityAtSpecificity + | SensitivitySpecificityBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | specificity, + | num_thresholds=200, + | class_id=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + diff --git a/.tether/man/metric_sparse_categorical_accuracy.txt b/.tether/man/metric_sparse_categorical_accuracy.txt new file mode 100644 index 0000000000..1619e5708e --- /dev/null +++ b/.tether/man/metric_sparse_categorical_accuracy.txt @@ -0,0 +1,67 @@ +Help on class SparseCategoricalAccuracy in module keras.src.metrics.accuracy_metrics: + +class SparseCategoricalAccuracy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | SparseCategoricalAccuracy(name='sparse_categorical_accuracy', dtype=None) + | + | Calculates how often predictions match integer labels. + | + | ```python + | acc = np.dot(sample_weight, np.equal(y_true, np.argmax(y_pred, axis=1)) + | ``` + | + | You can provide logits of classes as `y_pred`, since argmax of + | logits and probabilities are same. + | + | This metric creates two local variables, `total` and `count` that are used + | to compute the frequency with which `y_pred` matches `y_true`. This + | frequency is ultimately returned as `sparse categorical accuracy`: an + | idempotent operation that simply divides `total` by `count`. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.SparseCategoricalAccuracy() + | >>> m.update_state([[2], [1]], [[0.1, 0.6, 0.3], [0.05, 0.95, 0]]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([[2], [1]], [[0.1, 0.6, 0.3], [0.05, 0.95, 0]], + | ... sample_weight=[0.7, 0.3]) + | >>> m.result() + | 0.3 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='sparse_categorical_crossentropy', + | metrics=[keras.metrics.SparseCategoricalAccuracy()]) + | ``` + | + | Method resolution order: + | SparseCategoricalAccuracy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='sparse_categorical_accuracy', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_sparse_categorical_crossentropy.txt b/.tether/man/metric_sparse_categorical_crossentropy.txt new file mode 100644 index 0000000000..68562009a9 --- /dev/null +++ b/.tether/man/metric_sparse_categorical_crossentropy.txt @@ -0,0 +1,82 @@ +Help on class SparseCategoricalCrossentropy in module keras.src.metrics.probabilistic_metrics: + +class SparseCategoricalCrossentropy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | SparseCategoricalCrossentropy(name='sparse_categorical_crossentropy', dtype=None, from_logits=False, axis=-1) + | + | Computes the crossentropy metric between the labels and predictions. + | + | Use this crossentropy metric when there are two or more label classes. + | It expects labels to be provided as integers. If you want to provide labels + | that are one-hot encoded, please use the `CategoricalCrossentropy` + | metric instead. + | + | There should be `num_classes` floating point values per feature for `y_pred` + | and a single floating point value per feature for `y_true`. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | from_logits: (Optional) Whether output is expected + | to be a logits tensor. By default, we consider that output + | encodes a probability distribution. + | axis: (Optional) Defaults to `-1`. + | The dimension along which entropy is computed. + | + | Example: + | + | Example: + | + | >>> # y_true = one_hot(y_true) = [[0, 1, 0], [0, 0, 1]] + | >>> # logits = log(y_pred) + | >>> # softmax = exp(logits) / sum(exp(logits), axis=-1) + | >>> # softmax = [[0.05, 0.95, EPSILON], [0.1, 0.8, 0.1]] + | >>> # xent = -sum(y * log(softmax), 1) + | >>> # log(softmax) = [[-2.9957, -0.0513, -16.1181], + | >>> # [-2.3026, -0.2231, -2.3026]] + | >>> # y_true * log(softmax) = [[0, -0.0513, 0], [0, 0, -2.3026]] + | >>> # xent = [0.0513, 2.3026] + | >>> # Reduced xent = (0.0513 + 2.3026) / 2 + | >>> m = keras.metrics.SparseCategoricalCrossentropy() + | >>> m.update_state([1, 2], + | ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]) + | >>> m.result() + | 1.1769392 + | + | >>> m.reset_state() + | >>> m.update_state([1, 2], + | ... [[0.05, 0.95, 0], [0.1, 0.8, 0.1]], + | ... sample_weight=np.array([0.3, 0.7])) + | >>> m.result() + | 1.6271976 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='mse', + | metrics=[keras.metrics.SparseCategoricalCrossentropy()]) + | ``` + | + | Method resolution order: + | SparseCategoricalCrossentropy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='sparse_categorical_crossentropy', + | dtype=None, + | from_logits=False, + | axis=-1 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_sparse_top_k_categorical_accuracy.txt b/.tether/man/metric_sparse_top_k_categorical_accuracy.txt new file mode 100644 index 0000000000..e3f0d115d2 --- /dev/null +++ b/.tether/man/metric_sparse_top_k_categorical_accuracy.txt @@ -0,0 +1,55 @@ +Help on class SparseTopKCategoricalAccuracy in module keras.src.metrics.accuracy_metrics: + +class SparseTopKCategoricalAccuracy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | SparseTopKCategoricalAccuracy(k=5, name='sparse_top_k_categorical_accuracy', dtype=None) + | + | Computes how often integer targets are in the top `K` predictions. + | + | Args: + | k: (Optional) Number of top elements to look at for computing accuracy. + | Defaults to `5`. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.SparseTopKCategoricalAccuracy(k=1) + | >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], + | ... sample_weight=[0.7, 0.3]) + | >>> m.result() + | 0.3 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='sparse_categorical_crossentropy', + | metrics=[keras.metrics.SparseTopKCategoricalAccuracy()]) + | ``` + | + | Method resolution order: + | SparseTopKCategoricalAccuracy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | k=5, + | name='sparse_top_k_categorical_accuracy', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_sparse_top_op_categorical_accuracy.txt b/.tether/man/metric_sparse_top_op_categorical_accuracy.txt new file mode 100644 index 0000000000..ec0d081799 --- /dev/null +++ b/.tether/man/metric_sparse_top_op_categorical_accuracy.txt @@ -0,0 +1,54 @@ +Help on class SparseTopKCategoricalAccuracy in module keras.src.metrics.accuracy_metrics: + +class SparseTopKCategoricalAccuracy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | SparseTopKCategoricalAccuracy(k=5, name='sparse_top_k_categorical_accuracy', dtype=None) + | + | Computes how often integer targets are in the top `K` predictions. + | + | Args: + | k: (Optional) Number of top elements to look at for computing accuracy. + | Defaults to `5`. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Standalone usage: + | + | >>> m = keras.metrics.SparseTopKCategoricalAccuracy(k=1) + | >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], + | ... sample_weight=[0.7, 0.3]) + | >>> m.result() + | 0.3 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='sparse_categorical_crossentropy', + | metrics=[keras.metrics.SparseTopKCategoricalAccuracy()]) + | ``` + | + | Method resolution order: + | SparseTopKCategoricalAccuracy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | k=5, + | name='sparse_top_k_categorical_accuracy', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | diff --git a/.tether/man/metric_specificity_at_sensitivity.txt b/.tether/man/metric_specificity_at_sensitivity.txt new file mode 100644 index 0000000000..c3ef349099 --- /dev/null +++ b/.tether/man/metric_specificity_at_sensitivity.txt @@ -0,0 +1,89 @@ +Help on class SpecificityAtSensitivity in module keras.src.metrics.confusion_metrics: + +class SpecificityAtSensitivity(SensitivitySpecificityBase) + | SpecificityAtSensitivity(sensitivity, num_thresholds=200, class_id=None, name=None, dtype=None) + | + | Computes best specificity where sensitivity is >= specified value. + | + | `Sensitivity` measures the proportion of actual positives that are correctly + | identified as such `(tp / (tp + fn))`. + | `Specificity` measures the proportion of actual negatives that are correctly + | identified as such `(tn / (tn + fp))`. + | + | This metric creates four local variables, `true_positives`, + | `true_negatives`, `false_positives` and `false_negatives` that are used to + | compute the specificity at the given sensitivity. The threshold for the + | given sensitivity value is computed and used to evaluate the corresponding + | specificity. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | If `class_id` is specified, we calculate precision by considering only the + | entries in the batch for which `class_id` is above the threshold + | predictions, and computing the fraction of them for which `class_id` is + | indeed a correct label. + | + | For additional information about specificity and sensitivity, see + | [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). + | + | Args: + | sensitivity: A scalar value in range `[0, 1]`. + | num_thresholds: (Optional) Defaults to 200. The number of thresholds to + | use for matching the given sensitivity. + | class_id: (Optional) Integer class ID for which we want binary metrics. + | This must be in the half-open interval `[0, num_classes)`, where + | `num_classes` is the last dimension of predictions. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.SpecificityAtSensitivity(0.5) + | >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8]) + | >>> m.result() + | 0.66666667 + | + | >>> m.reset_state() + | >>> m.update_state([0, 0, 0, 1, 1], [0, 0.3, 0.8, 0.3, 0.8], + | ... sample_weight=[1, 1, 2, 2, 2]) + | >>> m.result() + | 0.5 + | + | Usage with `compile()` API: + | + | ```python + | model.compile( + | optimizer='sgd', + | loss='binary_crossentropy', + | metrics=[keras.metrics.SpecificityAtSensitivity()]) + | ``` + | + | Method resolution order: + | SpecificityAtSensitivity + | SensitivitySpecificityBase + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | sensitivity, + | num_thresholds=200, + | class_id=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + diff --git a/.tether/man/metric_squared_hinge.txt b/.tether/man/metric_squared_hinge.txt new file mode 100644 index 0000000000..825293c6cc --- /dev/null +++ b/.tether/man/metric_squared_hinge.txt @@ -0,0 +1,46 @@ +Help on class SquaredHinge in module keras.src.metrics.hinge_metrics: + +class SquaredHinge(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | SquaredHinge(name='squared_hinge', dtype=None) + | + | Computes the hinge metric between `y_true` and `y_pred`. + | + | `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are + | provided we will convert them to -1 or 1. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.SquaredHinge() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]]) + | >>> m.result() + | 1.86 + | >>> m.reset_state() + | >>> m.update_state([[0, 1], [0, 0]], [[0.6, 0.4], [0.4, 0.6]], + | ... sample_weight=[1, 0]) + | >>> m.result() + | 1.46 + | + | Method resolution order: + | SquaredHinge + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='squared_hinge', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_sum.txt b/.tether/man/metric_sum.txt new file mode 100644 index 0000000000..959b40f85b --- /dev/null +++ b/.tether/man/metric_sum.txt @@ -0,0 +1,62 @@ +Help on class Sum in module keras.src.metrics.reduction_metrics: + +class Sum(keras.src.metrics.metric.Metric) + | Sum(name='sum', dtype=None) + | + | Compute the (weighted) sum of the given values. + | + | For example, if `values` is `[1, 3, 5, 7]` then their sum is 16. + | If `sample_weight` was specified as `[1, 1, 0, 0]` then the sum would be 4. + | + | This metric creates one variable, `total`. + | This is ultimately returned as the sum value. + | + | Args: + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = metrics.Sum() + | >>> m.update_state([1, 3, 5, 7]) + | >>> m.result() + | 16.0 + | + | >>> m = metrics.Sum() + | >>> m.update_state([1, 3, 5, 7], sample_weight=[1, 1, 0, 0]) + | >>> m.result() + | 4.0 + | + | Method resolution order: + | Sum + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | name='sum', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | reset_state(self) + | Reset all of the metric state variables. + | + | This function is called between epochs/steps, + | when a metric is evaluated during training. + | + | result(self) + | Compute the current metric value. + | + | Returns: + | A scalar tensor, or a dictionary of scalar tensors. + | + | update_state( + | self, + | values, + | sample_weight=None + | ) + | Accumulate statistics for the metric. + | diff --git a/.tether/man/metric_top_k_categorical_accuracy.txt b/.tether/man/metric_top_k_categorical_accuracy.txt new file mode 100644 index 0000000000..417b62a85d --- /dev/null +++ b/.tether/man/metric_top_k_categorical_accuracy.txt @@ -0,0 +1,57 @@ +Help on class TopKCategoricalAccuracy in module keras.src.metrics.accuracy_metrics: + +class TopKCategoricalAccuracy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | TopKCategoricalAccuracy(k=5, name='top_k_categorical_accuracy', dtype=None) + | + | Computes how often targets are in the top `K` predictions. + | + | Args: + | k: (Optional) Number of top elements to look at for computing accuracy. + | Defaults to `5`. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.TopKCategoricalAccuracy(k=1) + | >>> m.update_state([[0, 0, 1], [0, 1, 0]], + | ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 0, 1], [0, 1, 0]], + | ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], + | ... sample_weight=[0.7, 0.3]) + | >>> m.result() + | 0.3 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='categorical_crossentropy', + | metrics=[keras.metrics.TopKCategoricalAccuracy()]) + | ``` + | + | Method resolution order: + | TopKCategoricalAccuracy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | k=5, + | name='top_k_categorical_accuracy', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | + diff --git a/.tether/man/metric_top_op_categorical_accuracy.txt b/.tether/man/metric_top_op_categorical_accuracy.txt new file mode 100644 index 0000000000..b489a44cfe --- /dev/null +++ b/.tether/man/metric_top_op_categorical_accuracy.txt @@ -0,0 +1,56 @@ +Help on class TopKCategoricalAccuracy in module keras.src.metrics.accuracy_metrics: + +class TopKCategoricalAccuracy(keras.src.metrics.reduction_metrics.MeanMetricWrapper) + | TopKCategoricalAccuracy(k=5, name='top_k_categorical_accuracy', dtype=None) + | + | Computes how often targets are in the top `K` predictions. + | + | Args: + | k: (Optional) Number of top elements to look at for computing accuracy. + | Defaults to `5`. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Standalone usage: + | + | >>> m = keras.metrics.TopKCategoricalAccuracy(k=1) + | >>> m.update_state([[0, 0, 1], [0, 1, 0]], + | ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]]) + | >>> m.result() + | 0.5 + | + | >>> m.reset_state() + | >>> m.update_state([[0, 0, 1], [0, 1, 0]], + | ... [[0.1, 0.9, 0.8], [0.05, 0.95, 0]], + | ... sample_weight=[0.7, 0.3]) + | >>> m.result() + | 0.3 + | + | Usage with `compile()` API: + | + | ```python + | model.compile(optimizer='sgd', + | loss='categorical_crossentropy', + | metrics=[keras.metrics.TopKCategoricalAccuracy()]) + | ``` + | + | Method resolution order: + | TopKCategoricalAccuracy + | keras.src.metrics.reduction_metrics.MeanMetricWrapper + | keras.src.metrics.reduction_metrics.Mean + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | k=5, + | name='top_k_categorical_accuracy', + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Return the serializable config of the metric. + | diff --git a/.tether/man/metric_true_negatives.txt b/.tether/man/metric_true_negatives.txt new file mode 100644 index 0000000000..210fe61160 --- /dev/null +++ b/.tether/man/metric_true_negatives.txt @@ -0,0 +1,54 @@ +Help on class TrueNegatives in module keras.src.metrics.confusion_metrics: + +class TrueNegatives(_ConfusionMatrixConditionCount) + | TrueNegatives(thresholds=None, name=None, dtype=None) + | + | Calculates the number of true negatives. + | + | If `sample_weight` is given, calculates the sum of the weights of + | true negatives. This metric creates one local variable, `accumulator` + | that is used to keep track of the number of true negatives. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | thresholds: (Optional) Defaults to `0.5`. A float value, or a Python + | list/tuple of float threshold values in `[0, 1]`. A threshold is + | compared with prediction values to determine the truth value of + | predictions (i.e., above the threshold is `True`, below is `False`). + | If used with a loss function that sets `from_logits=True` (i.e. no + | sigmoid applied to predictions), `thresholds` should be set to 0. + | One metric value is generated for each threshold value. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.TrueNegatives() + | >>> m.update_state([0, 1, 0, 0], [1, 1, 0, 0]) + | >>> m.result() + | 2.0 + | + | >>> m.reset_state() + | >>> m.update_state([0, 1, 0, 0], [1, 1, 0, 0], sample_weight=[0, 0, 1, 0]) + | >>> m.result() + | 1.0 + | + | Method resolution order: + | TrueNegatives + | _ConfusionMatrixConditionCount + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | thresholds=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/metric_true_positives.txt b/.tether/man/metric_true_positives.txt new file mode 100644 index 0000000000..4dee3533ba --- /dev/null +++ b/.tether/man/metric_true_positives.txt @@ -0,0 +1,54 @@ +Help on class TruePositives in module keras.src.metrics.confusion_metrics: + +class TruePositives(_ConfusionMatrixConditionCount) + | TruePositives(thresholds=None, name=None, dtype=None) + | + | Calculates the number of true positives. + | + | If `sample_weight` is given, calculates the sum of the weights of + | true positives. This metric creates one local variable, `true_positives` + | that is used to keep track of the number of true positives. + | + | If `sample_weight` is `None`, weights default to 1. + | Use `sample_weight` of 0 to mask values. + | + | Args: + | thresholds: (Optional) Defaults to `0.5`. A float value, or a Python + | list/tuple of float threshold values in `[0, 1]`. A threshold is + | compared with prediction values to determine the truth value of + | predictions (i.e., above the threshold is `True`, below is `False`). + | If used with a loss function that sets `from_logits=True` (i.e. no + | sigmoid applied to predictions), `thresholds` should be set to 0. + | One metric value is generated for each threshold value. + | name: (Optional) string name of the metric instance. + | dtype: (Optional) data type of the metric result. + | + | Example: + | + | >>> m = keras.metrics.TruePositives() + | >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1]) + | >>> m.result() + | 2.0 + | + | >>> m.reset_state() + | >>> m.update_state([0, 1, 1, 1], [1, 0, 1, 1], sample_weight=[0, 0, 1, 0]) + | >>> m.result() + | 1.0 + | + | Method resolution order: + | TruePositives + | _ConfusionMatrixConditionCount + | keras.src.metrics.metric.Metric + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | thresholds=None, + | name=None, + | dtype=None + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/model_to_dot.txt b/.tether/man/model_to_dot.txt new file mode 100644 index 0000000000..ad270a61a1 --- /dev/null +++ b/.tether/man/model_to_dot.txt @@ -0,0 +1,37 @@ +__signature__ +keras.utils.model_to_dot( + model, + show_shapes=False, + show_dtype=False, + show_layer_names=True, + rankdir='TB', + expand_nested=False, + dpi=200, + subgraph=False, + show_layer_activations=False, + show_trainable=False, + **kwargs +) +__doc__ +Convert a Keras model to dot format. + +Args: + model: A Keras model instance. + show_shapes: whether to display shape information. + show_dtype: whether to display layer dtypes. + show_layer_names: whether to display layer names. + rankdir: `rankdir` argument passed to PyDot, + a string specifying the format of the plot: `"TB"` + creates a vertical plot; `"LR"` creates a horizontal plot. + expand_nested: whether to expand nested Functional models + into clusters. + dpi: Image resolution in dots per inch. + subgraph: whether to return a `pydot.Cluster` instance. + show_layer_activations: Display layer activations (only for layers that + have an `activation` property). + show_trainable: whether to display if a layer is trainable. + +Returns: + A `pydot.Dot` instance representing the Keras model or + a `pydot.Cluster` instance representing nested model if + `subgraph=True`. diff --git a/.tether/man/normalize.txt b/.tether/man/normalize.txt new file mode 100644 index 0000000000..e2fc686668 --- /dev/null +++ b/.tether/man/normalize.txt @@ -0,0 +1,19 @@ +__signature__ +keras.utils.normalize( + x, + axis=-1, + order=2 +) +__doc__ +Normalizes an array. + +If the input is a NumPy array, a NumPy array will be returned. +If it's a backend tensor, a backend tensor will be returned. + +Args: + x: Array to normalize. + axis: axis along which to normalize. + order: Normalization order (e.g. `order=2` for L2 norm). + +Returns: + A normalized copy of the array. diff --git a/.tether/man/op_abs.txt b/.tether/man/op_abs.txt new file mode 100644 index 0000000000..dd629b5a1e --- /dev/null +++ b/.tether/man/op_abs.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.absolute(x) +__doc__ +Compute the absolute value element-wise. + +`keras.ops.abs` is a shorthand for this function. + +Args: + x: Input tensor. + +Returns: + An array containing the absolute value of each element in `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-1.2, 1.2]) +>>> keras.ops.absolute(x) +array([1.2, 1.2], dtype=float32) + diff --git a/.tether/man/op_add.txt b/.tether/man/op_add.txt new file mode 100644 index 0000000000..e6df8d75dd --- /dev/null +++ b/.tether/man/op_add.txt @@ -0,0 +1,27 @@ +__signature__ +keras.ops.add(x1, x2) +__doc__ +Add arguments element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + The tensor containing the element-wise sum of `x1` and `x2`. + +Examples: +>>> x1 = keras.ops.convert_to_tensor([1, 4]) +>>> x2 = keras.ops.convert_to_tensor([5, 6]) +>>> keras.ops.add(x1, x2) +array([6, 10], dtype=int32) + +`keras.ops.add` also broadcasts shapes: +>>> x1 = keras.ops.convert_to_tensor( +... [[5, 4], +... [5, 6]] +... ) +>>> x2 = keras.ops.convert_to_tensor([5, 6]) +>>> keras.ops.add(x1, x2) +array([[10 10] + [10 12]], shape=(2, 2), dtype=int32) diff --git a/.tether/man/op_all.txt b/.tether/man/op_all.txt new file mode 100644 index 0000000000..e4bf75d30c --- /dev/null +++ b/.tether/man/op_all.txt @@ -0,0 +1,37 @@ +__signature__ +keras.ops.all( + x, + axis=None, + keepdims=False +) +__doc__ +Test whether all array elements along a given axis evaluate to `True`. + +Args: + x: Input tensor. + axis: An integer or tuple of integers that represent the axis along + which a logical AND reduction is performed. The default + (`axis=None`) is to perform a logical AND over all the dimensions + of the input array. `axis` may be negative, in which case it counts + for the last to the first axis. + keepdims: If `True`, axes which are reduced are left in the result as + dimensions with size one. With this option, the result will + broadcast correctly against the input array. Defaults to `False`. + +Returns: + The tensor containing the logical AND reduction over the `axis`. + +Examples: +>>> x = keras.ops.convert_to_tensor([True, False]) +>>> keras.ops.all(x) +array(False, shape=(), dtype=bool) + +>>> x = keras.ops.convert_to_tensor([[True, False], [True, True]]) +>>> keras.ops.all(x, axis=0) +array([ True False], shape=(2,), dtype=bool) + +`keepdims=True` outputs a tensor with dimensions reduced to one. +>>> x = keras.ops.convert_to_tensor([[True, False], [True, True]]) +>>> keras.ops.all(x, keepdims=True) +array([[False]], shape=(1, 1), dtype=bool) + diff --git a/.tether/man/op_amax.txt b/.tether/man/op_amax.txt new file mode 100644 index 0000000000..b8c31a9c0e --- /dev/null +++ b/.tether/man/op_amax.txt @@ -0,0 +1,36 @@ +__signature__ +keras.ops.amax( + x, + axis=None, + keepdims=False +) +__doc__ +Returns the maximum of an array or maximum value along an axis. + +Args: + x: Input tensor. + axis: Axis along which to compute the maximum. + By default (`axis=None`), find the maximum value in all the + dimensions of the input array. + keepdims: If `True`, axes which are reduced are left in the result as + dimensions that are broadcast to the size of the original + input tensor. Defaults to `False`. + +Returns: + An array with the maximum value. If `axis=None`, the result is a scalar + value representing the maximum element in the entire array. If `axis` is + given, the result is an array with the maximum values along + the specified axis. + +Examples: +>>> x = keras.ops.convert_to_tensor([[1, 3, 5], [2, 3, 6]]) +>>> keras.ops.amax(x) +array(6, dtype=int32) + +>>> x = keras.ops.convert_to_tensor([[1, 6, 8], [1, 5, 2]]) +>>> keras.ops.amax(x, axis=0) +array([1, 6, 8], dtype=int32) + +>>> x = keras.ops.convert_to_tensor([[1, 6, 8], [1, 5, 2]]) +>>> keras.ops.amax(x, axis=1, keepdims=True) +array([[8], [5]], dtype=int32) diff --git a/.tether/man/op_amin.txt b/.tether/man/op_amin.txt new file mode 100644 index 0000000000..d3ab9d9ec6 --- /dev/null +++ b/.tether/man/op_amin.txt @@ -0,0 +1,36 @@ +__signature__ +keras.ops.amin( + x, + axis=None, + keepdims=False +) +__doc__ +Returns the minimum of an array or minimum value along an axis. + +Args: + x: Input tensor. + axis: Axis along which to compute the minimum. + By default (`axis=None`), find the minimum value in all the + dimensions of the input array. + keepdims: If `True`, axes which are reduced are left in the result as + dimensions that are broadcast to the size of the original + input tensor. Defaults to `False`. + +Returns: + An array with the minimum value. If `axis=None`, the result is a scalar + value representing the minimum element in the entire array. If `axis` is + given, the result is an array with the minimum values along + the specified axis. + +Examples: +>>> x = keras.ops.convert_to_tensor([1, 3, 5, 2, 3, 6]) +>>> keras.ops.amin(x) +array(1, dtype=int32) + +>>> x = keras.ops.convert_to_tensor([[1, 6, 8], [7, 5, 3]]) +>>> keras.ops.amin(x, axis=0) +array([1,5,3], dtype=int32) + +>>> x = keras.ops.convert_to_tensor([[1, 6, 8], [7, 5, 3]]) +>>> keras.ops.amin(x, axis=1, keepdims=True) +array([[1],[3]], dtype=int32) diff --git a/.tether/man/op_any.txt b/.tether/man/op_any.txt new file mode 100644 index 0000000000..9850a9ce0b --- /dev/null +++ b/.tether/man/op_any.txt @@ -0,0 +1,37 @@ +__signature__ +keras.ops.any( + x, + axis=None, + keepdims=False +) +__doc__ +Test whether any array element along a given axis evaluates to `True`. + +Args: + x: Input tensor. + axis: An integer or tuple of integers that represent the axis along + which a logical OR reduction is performed. The default + (`axis=None`) is to perform a logical OR over all the dimensions + of the input array. `axis` may be negative, in which case it counts + for the last to the first axis. + keepdims: If `True`, axes which are reduced are left in the result as + dimensions with size one. With this option, the result will + broadcast correctly against the input array. Defaults to `False`. + +Returns: + The tensor containing the logical OR reduction over the `axis`. + +Examples: +>>> x = keras.ops.convert_to_tensor([True, False]) +>>> keras.ops.any(x) +array(True, shape=(), dtype=bool) + +>>> x = keras.ops.convert_to_tensor([[True, False], [True, True]]) +>>> keras.ops.any(x, axis=0) +array([ True True], shape=(2,), dtype=bool) + +`keepdims=True` outputs a tensor with dimensions reduced to one. +>>> x = keras.ops.convert_to_tensor([[True, False], [True, True]]) +>>> keras.ops.all(x, keepdims=True) +array([[False]], shape=(1, 1), dtype=bool) + diff --git a/.tether/man/op_append.txt b/.tether/man/op_append.txt new file mode 100644 index 0000000000..6c8c9eccb2 --- /dev/null +++ b/.tether/man/op_append.txt @@ -0,0 +1,37 @@ +__signature__ +keras.ops.append( + x1, + x2, + axis=None +) +__doc__ +Append tensor `x2` to the end of tensor `x1`. + +Args: + x1: First input tensor. + x2: Second input tensor. + axis: Axis along which tensor `x2` is appended to tensor `x1`. + If `None`, both tensors are flattened before use. + +Returns: + A tensor with the values of `x2` appended to `x1`. + +Examples: +>>> x1 = keras.ops.convert_to_tensor([1, 2, 3]) +>>> x2 = keras.ops.convert_to_tensor([[4, 5, 6], [7, 8, 9]]) +>>> keras.ops.append(x1, x2) +array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32) + +When `axis` is specified, `x1` and `x2` must have compatible shapes. +>>> x1 = keras.ops.convert_to_tensor([[1, 2, 3], [4, 5, 6]]) +>>> x2 = keras.ops.convert_to_tensor([[7, 8, 9]]) +>>> keras.ops.append(x1, x2, axis=0) +array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]], dtype=int32) +>>> x3 = keras.ops.convert_to_tensor([7, 8, 9]) +>>> keras.ops.append(x1, x3, axis=0) +Traceback (most recent call last): + ... +TypeError: Cannot concatenate arrays with different numbers of +dimensions: got (2, 3), (3,). diff --git a/.tether/man/op_arange.txt b/.tether/man/op_arange.txt new file mode 100644 index 0000000000..72d7e67b6b --- /dev/null +++ b/.tether/man/op_arange.txt @@ -0,0 +1,52 @@ +__signature__ +keras.ops.arange( + start, + stop=None, + step=1, + dtype=None +) +__doc__ +Return evenly spaced values within a given interval. + +`arange` can be called with a varying number of positional arguments: +* `arange(stop)`: Values are generated within the half-open interval + `[0, stop)` (in other words, the interval including start but excluding + stop). +* `arange(start, stop)`: Values are generated within the half-open interval + `[start, stop)`. +* `arange(start, stop, step)`: Values are generated within the half-open + interval `[start, stop)`, with spacing between values given by step. + +Args: + start: Integer or real, representing the start of the interval. The + interval includes this value. + stop: Integer or real, representing the end of the interval. The + interval does not include this value, except in some cases where + `step` is not an integer and floating point round-off affects the + length of `out`. Defaults to `None`. + step: Integer or real, represent the spacing between values. For any + output `out`, this is the distance between two adjacent values, + `out[i+1] - out[i]`. The default step size is 1. If `step` is + specified as a position argument, `start` must also be given. + dtype: The type of the output array. If `dtype` is not given, infer the + data type from the other input arguments. + +Returns: + Tensor of evenly spaced values. + For floating point arguments, the length of the result is + `ceil((stop - start)/step)`. Because of floating point overflow, this + rule may result in the last element of out being greater than stop. + +Examples: +>>> keras.ops.arange(3) +array([0, 1, 2], dtype=int32) + +>>> keras.ops.arange(3.0) +array([0., 1., 2.], dtype=float32) + +>>> keras.ops.arange(3, 7) +array([3, 4, 5, 6], dtype=int32) + +>>> keras.ops.arange(3, 7, 2) +array([3, 5], dtype=int32) + diff --git a/.tether/man/op_arccos.txt b/.tether/man/op_arccos.txt new file mode 100644 index 0000000000..8c7619ab9d --- /dev/null +++ b/.tether/man/op_arccos.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.arccos(x) +__doc__ +Trigonometric inverse cosine, element-wise. + +The inverse of `cos` so that, if `y = cos(x)`, then `x = arccos(y)`. + +Args: + x: Input tensor. + +Returns: + Tensor of the angle of the ray intersecting the unit circle at the given + x-coordinate in radians `[0, pi]`. + +Example: +>>> x = keras.ops.convert_to_tensor([1, -1]) +>>> keras.ops.arccos(x) +array([0.0, 3.1415927], dtype=float32) diff --git a/.tether/man/op_arccosh.txt b/.tether/man/op_arccosh.txt new file mode 100644 index 0000000000..9b856f1c51 --- /dev/null +++ b/.tether/man/op_arccosh.txt @@ -0,0 +1,15 @@ +__signature__ +keras.ops.arccosh(x) +__doc__ +Inverse hyperbolic cosine, element-wise. + +Arguments: + x: Input tensor. + +Returns: + Output tensor of same shape as x. + +Example: +>>> x = keras.ops.convert_to_tensor([10, 100]) +>>> keras.ops.arccosh(x) +array([2.993223, 5.298292], dtype=float32) diff --git a/.tether/man/op_arcsin.txt b/.tether/man/op_arcsin.txt new file mode 100644 index 0000000000..7d147aff22 --- /dev/null +++ b/.tether/man/op_arcsin.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.arcsin(x) +__doc__ +Inverse sine, element-wise. + +Args: + x: Input tensor. + +Returns: + Tensor of the inverse sine of each element in `x`, in radians and in + the closed interval `[-pi/2, pi/2]`. + +Example: +>>> x = keras.ops.convert_to_tensor([1, -1, 0]) +>>> keras.ops.arcsin(x) +array([ 1.5707964, -1.5707964, 0.], dtype=float32) diff --git a/.tether/man/op_arcsinh.txt b/.tether/man/op_arcsinh.txt new file mode 100644 index 0000000000..dfc63f22bf --- /dev/null +++ b/.tether/man/op_arcsinh.txt @@ -0,0 +1,15 @@ +__signature__ +keras.ops.arcsinh(x) +__doc__ +Inverse hyperbolic sine, element-wise. + +Arguments: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. + +Example: +>>> x = keras.ops.convert_to_tensor([1, -1, 0]) +>>> keras.ops.arcsinh(x) +array([0.88137364, -0.88137364, 0.0], dtype=float32) diff --git a/.tether/man/op_arctan.txt b/.tether/man/op_arctan.txt new file mode 100644 index 0000000000..48f9d04cd7 --- /dev/null +++ b/.tether/man/op_arctan.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.arctan(x) +__doc__ +Trigonometric inverse tangent, element-wise. + +Args: + x: Input tensor. + +Returns: + Tensor of the inverse tangent of each element in `x`, in the interval + `[-pi/2, pi/2]`. + +Example: +>>> x = keras.ops.convert_to_tensor([0, 1]) +>>> keras.ops.arctan(x) +array([0., 0.7853982], dtype=float32) diff --git a/.tether/man/op_arctan2.txt b/.tether/man/op_arctan2.txt new file mode 100644 index 0000000000..801122fa7b --- /dev/null +++ b/.tether/man/op_arctan2.txt @@ -0,0 +1,39 @@ +__signature__ +keras.ops.arctan2(x1, x2) +__doc__ +Element-wise arc tangent of `x1/x2` choosing the quadrant correctly. + +The quadrant (i.e., branch) is chosen so that `arctan2(x1, x2)` is the +signed angle in radians between the ray ending at the origin and passing +through the point `(1, 0)`, and the ray ending at the origin and passing +through the point `(x2, x1)`. (Note the role reversal: the "y-coordinate" +is the first function parameter, the "x-coordinate" is the second.) By IEEE +convention, this function is defined for `x2 = +/-0` and for either or both +of `x1` and `x2` `= +/-inf`. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Tensor of angles in radians, in the range `[-pi, pi]`. + +Examples: +Consider four points in different quadrants: +>>> x = keras.ops.convert_to_tensor([-1, +1, +1, -1]) +>>> y = keras.ops.convert_to_tensor([-1, -1, +1, +1]) +>>> keras.ops.arctan2(y, x) * 180 / numpy.pi +array([-135., -45., 45., 135.], dtype=float32) + +Note the order of the parameters. `arctan2` is defined also when x2=0 and +at several other points, obtaining values in the range `[-pi, pi]`: +>>> keras.ops.arctan2( +... keras.ops.array([1., -1.]), +... keras.ops.array([0., 0.]), +... ) +array([ 1.5707964, -1.5707964], dtype=float32) +>>> keras.ops.arctan2( +... keras.ops.array([0., 0., numpy.inf]), +... keras.ops.array([+0., -0., numpy.inf]), +... ) +array([0., 3.1415925, 0.7853982], dtype=float32) diff --git a/.tether/man/op_arctanh.txt b/.tether/man/op_arctanh.txt new file mode 100644 index 0000000000..5975ac374a --- /dev/null +++ b/.tether/man/op_arctanh.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.arctanh(x) +__doc__ +Inverse hyperbolic tangent, element-wise. + +Arguments: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. diff --git a/.tether/man/op_argmax.txt b/.tether/man/op_argmax.txt new file mode 100644 index 0000000000..85d8cbff0a --- /dev/null +++ b/.tether/man/op_argmax.txt @@ -0,0 +1,32 @@ +__signature__ +keras.ops.argmax( + x, + axis=None, + keepdims=False +) +__doc__ +Returns the indices of the maximum values along an axis. + +Args: + x: Input tensor. + axis: By default, the index is into the flattened tensor, otherwise + along the specified axis. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. Defaults to `False`. + +Returns: + Tensor of indices. It has the same shape as `x`, with the dimension + along `axis` removed. + +Example: +>>> x = keras.ops.arange(6).reshape(2, 3) + 10 +>>> x +array([[10, 11, 12], + [13, 14, 15]], dtype=int32) +>>> keras.ops.argmax(x) +array(5, dtype=int32) +>>> keras.ops.argmax(x, axis=0) +array([1, 1, 1], dtype=int32) +>>> keras.ops.argmax(x, axis=1) +array([2, 2], dtype=int32) + diff --git a/.tether/man/op_argmin.txt b/.tether/man/op_argmin.txt new file mode 100644 index 0000000000..89e18f5341 --- /dev/null +++ b/.tether/man/op_argmin.txt @@ -0,0 +1,32 @@ +__signature__ +keras.ops.argmin( + x, + axis=None, + keepdims=False +) +__doc__ +Returns the indices of the minium values along an axis. + +Args: + x: Input tensor. + axis: By default, the index is into the flattened tensor, otherwise + along the specified axis. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. Defaults to `False`. + +Returns: + Tensor of indices. It has the same shape as `x`, with the dimension + along `axis` removed. + +Example: +>>> x = keras.ops.arange(6).reshape(2, 3) + 10 +>>> x +array([[10, 11, 12], + [13, 14, 15]], dtype=int32) +>>> keras.ops.argmin(x) +array(0, dtype=int32) +>>> keras.ops.argmin(x, axis=0) +array([0, 0, 0], dtype=int32) +>>> keras.ops.argmin(x, axis=1) +array([0, 0], dtype=int32) + diff --git a/.tether/man/op_argsort.txt b/.tether/man/op_argsort.txt new file mode 100644 index 0000000000..328ecc9a08 --- /dev/null +++ b/.tether/man/op_argsort.txt @@ -0,0 +1,33 @@ +__signature__ +keras.ops.argsort(x, axis=-1) +__doc__ +Returns the indices that would sort a tensor. + +Args: + x: Input tensor. + axis: Axis along which to sort. Defaults to`-1` (the last axis). If + `None`, the flattened tensor is used. + +Returns: + Tensor of indices that sort `x` along the specified `axis`. + +Examples: +One dimensional array: +>>> x = keras.ops.array([3, 1, 2]) +>>> keras.ops.argsort(x) +array([1, 2, 0], dtype=int32) + +Two-dimensional array: +>>> x = keras.ops.array([[0, 3], [3, 2], [4, 5]]) +>>> x +array([[0, 3], + [3, 2], + [4, 5]], dtype=int32) +>>> keras.ops.argsort(x, axis=0) +array([[0, 1], + [1, 0], + [2, 2]], dtype=int32) +>>> keras.ops.argsort(x, axis=1) +array([[0, 1], + [1, 0], + [0, 1]], dtype=int32) diff --git a/.tether/man/op_array.txt b/.tether/man/op_array.txt new file mode 100644 index 0000000000..edc0712df6 --- /dev/null +++ b/.tether/man/op_array.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.array(x, dtype=None) +__doc__ +Create a tensor. + +Args: + x: Input tensor. + dtype: The desired data-type for the tensor. + +Returns: + A tensor. + +Examples: +>>> keras.ops.array([1, 2, 3]) +array([1, 2, 3], dtype=int32) + +>>> keras.ops.array([1, 2, 3], dtype="float32") +array([1., 2., 3.], dtype=float32) diff --git a/.tether/man/op_average.txt b/.tether/man/op_average.txt new file mode 100644 index 0000000000..39868ad5a1 --- /dev/null +++ b/.tether/man/op_average.txt @@ -0,0 +1,57 @@ +__signature__ +keras.ops.average( + x, + axis=None, + weights=None +) +__doc__ +Compute the weighted average along the specified axis. + +Args: + x: Input tensor. + axis: Integer along which to average `x`. The default, `axis=None`, + will average over all of the elements of the input tensor. If axis + is negative it counts from the last to the first axis. + weights: Tensor of wieghts associated with the values in `x`. Each + value in `x` contributes to the average according to its + associated weight. The weights array can either be 1-D (in which + case its length must be the size of a along the given axis) or of + the same shape as `x`. If `weights=None` (default), then all data + in `x` are assumed to have a weight equal to one. + + The 1-D calculation is: `avg = sum(a * weights) / sum(weights)`. + The only constraint on weights is that `sum(weights)` must not be 0. + +Returns: + Return the average along the specified axis. + +Examples: +>>> data = keras.ops.arange(1, 5) +>>> data +array([1, 2, 3, 4], dtype=int32) +>>> keras.ops.average(data) +array(2.5, dtype=float32) +>>> keras.ops.average( +... keras.ops.arange(1, 11), +... weights=keras.ops.arange(10, 0, -1) +... ) +array(4., dtype=float32) + +>>> data = keras.ops.arange(6).reshape((3, 2)) +>>> data +array([[0, 1], + [2, 3], + [4, 5]], dtype=int32) +>>> keras.ops.average( +... data, +... axis=1, +... weights=keras.ops.array([1./4, 3./4]) +... ) +array([0.75, 2.75, 4.75], dtype=float32) +>>> keras.ops.average( +... data, +... weights=keras.ops.array([1./4, 3./4]) +... ) +Traceback (most recent call last): + ... +ValueError: Axis must be specified when shapes of a and weights differ. diff --git a/.tether/man/op_average_pool.txt b/.tether/man/op_average_pool.txt new file mode 100644 index 0000000000..269dc6d9c2 --- /dev/null +++ b/.tether/man/op_average_pool.txt @@ -0,0 +1,40 @@ +__signature__ +keras.ops.average_pool( + inputs, + pool_size, + strides=None, + padding='valid', + data_format=None +) +__doc__ +Average pooling operation. + +Args: + inputs: Tensor of rank N+2. `inputs` has shape + `(batch_size,) + inputs_spatial_shape + (num_channels,)` if + `data_format="channels_last"`, or + `(batch_size, num_channels) + inputs_spatial_shape` if + `data_format="channels_first"`. Pooling happens over the spatial + dimensions only. + pool_size: int or tuple/list of integers of size + `len(inputs_spatial_shape)`, specifying the size of the pooling + window for each spatial dimension of the input tensor. If + `pool_size` is int, then every spatial dimension shares the same + `pool_size`. + strides: int or tuple/list of integers of size + `len(inputs_spatial_shape)`. The stride of the sliding window for + each spatial dimension of the input tensor. If `strides` is int, + then every spatial dimension shares the same `strides`. + padding: string, either `"valid"` or `"same"`. `"valid"` means no + padding is applied, and `"same"` results in padding evenly to the + left/right or up/down of the input such that output has the + same height/width dimension as the input when `strides=1`. + data_format: A string, either `"channels_last"` or `"channels_first"`. + `data_format` determines the ordering of the dimensions in the + inputs. If `data_format="channels_last"`, `inputs` is of shape + `(batch_size, ..., channels)` while if + `data_format="channels_first"`, `inputs` is of shape + `(batch_size, channels, ...)`. + +Returns: + A tensor of rank N+2, the result of the average pooling operation. diff --git a/.tether/man/op_batch_normalization.txt b/.tether/man/op_batch_normalization.txt new file mode 100644 index 0000000000..293cd3919c --- /dev/null +++ b/.tether/man/op_batch_normalization.txt @@ -0,0 +1,50 @@ +__signature__ +keras.ops.batch_normalization( + x, + mean, + variance, + axis, + offset=None, + scale=None, + epsilon=0.001 +) +__doc__ +Normalizes `x` by `mean` and `variance`. + +This op is typically used by the batch normalization step in a neural +network. It normalizes the input tensor along the given axis. + +Args: + x: Input tensor. + mean: A mean vector of the same length as the `axis` dimension of the + input thensor. + variance: A variance vector of the same length as the `axis` dimension + of the input tensor. + axis: Integer, the axis that should be normalized. + offset: An offset vector of the same length as the `axis` dimension of + the input tensor. If not `None`, `offset` is added to the normalized + tensor. Defaults to `None`. + scale: A scale vector of the same length as the `axis` dimension of the + input tensor. If not `None`, the normalized tensor is multiplied by + `scale`. Defaults to `None`. + epsilon: Small float added to variance to avoid dividing by zero. + Defaults to 1e-3. + +Returns: + The normalized tensor. + +Example: + +>>> x = keras.ops.convert_to_tensor( +... [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]] +... ) +>>> keras.ops.batch_normalization( +... x, +... mean=[0.4, 0.5, 0.6], +... variance=[0.67, 0.67, 0.67], +... axis=-1 +... ) +array([[-3.6624e-01, -3.6624e-01, -3.6624e-01], + [-4.6445e-09, 0.0000e+00, -1.8578e-08], + [ 3.6624e-01, 3.6624e-01, 3.6624e-01]]) + diff --git a/.tether/man/op_binary_crossentropy.txt b/.tether/man/op_binary_crossentropy.txt new file mode 100644 index 0000000000..732f5b028a --- /dev/null +++ b/.tether/man/op_binary_crossentropy.txt @@ -0,0 +1,37 @@ +__signature__ +keras.ops.binary_crossentropy( + target, + output, + from_logits=False +) +__doc__ +Computes binary cross-entropy loss between target and output tensor. + +The binary cross-entropy loss is commonly used in binary +classification tasks where each input sample belongs to one +of the two classes. It measures the dissimilarity between the +target and output probabilities or logits. + +Args: + target: The target tensor representing the true binary labels. + Its shape should match the shape of the `output` tensor. + output: The output tensor representing the predicted probabilities + or logits. Its shape should match the shape of the + `target` tensor. + from_logits: (optional) Whether `output` is a tensor of logits or + probabilities. + Set it to `True` if `output` represents logits; otherwise, + set it to `False` if `output` represents probabilities. + Defaults to`False`. + +Returns: + Integer tensor: The computed binary cross-entropy loss between + `target` and `output`. + +Example: + +>>> target = keras.ops.convert_to_tensor([0, 1, 1, 0]) +>>> output = keras.ops.convert_to_tensor([0.1, 0.9, 0.8, 0.2]) +>>> binary_crossentropy(target, output) +array([0.10536054 0.10536054 0.22314355 0.22314355], + shape=(4,), dtype=float32) diff --git a/.tether/man/op_bincount.txt b/.tether/man/op_bincount.txt new file mode 100644 index 0000000000..4bc0d9c214 --- /dev/null +++ b/.tether/man/op_bincount.txt @@ -0,0 +1,44 @@ +__signature__ +keras.ops.bincount( + x, + weights=None, + minlength=0, + sparse=False +) +__doc__ +Count the number of occurrences of each value in a tensor of integers. + +Args: + x: Input tensor. + It must be of dimension 1, and it must only contain non-negative + integer(s). + weights: Weight tensor. + It must have the same length as `x`. The default value is `None`. + If specified, `x` is weighted by it, i.e. if `n = x[i]`, + `out[n] += weight[i]` instead of the default behavior `out[n] += 1`. + minlength: An integer. + The default value is 0. If specified, there will be at least + this number of bins in the output tensor. If greater than + `max(x) + 1`, each value of the output at an index higher than + `max(x)` is set to 0. + sparse: Whether to return a sparse tensor; for backends that support + sparse tensors. + +Returns: + 1D tensor where each element gives the number of occurrence(s) of its + index value in x. Its length is the maximum between `max(x) + 1` and + minlength. + +Examples: +>>> x = keras.ops.array([1, 2, 2, 3], dtype="uint8") +>>> keras.ops.bincount(x) +array([0, 1, 2, 1], dtype=int32) +>>> weights = x / 2 +>>> weights +array([0.5, 1., 1., 1.5], dtype=float64) +>>> keras.ops.bincount(x, weights=weights) +array([0., 0.5, 2., 1.5], dtype=float64) +>>> minlength = (keras.ops.max(x).numpy() + 1) + 2 # 6 +>>> keras.ops.bincount(x, minlength=minlength) +array([0, 1, 2, 1, 0, 0], dtype=int32) + diff --git a/.tether/man/op_broadcast_to.txt b/.tether/man/op_broadcast_to.txt new file mode 100644 index 0000000000..6b034e7d00 --- /dev/null +++ b/.tether/man/op_broadcast_to.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.broadcast_to(x, shape) +__doc__ +Broadcast a tensor to a new shape. + +Args: + x: The tensor to broadcast. + shape: The shape of the desired tensor. A single integer `i` is + interpreted as `(i,)`. + +Returns: + A tensor with the desired shape. + +Examples: +>>> x = keras.ops.array([1, 2, 3]) +>>> keras.ops.broadcast_to(x, (3, 3)) +array([[1, 2, 3], + [1, 2, 3], + [1, 2, 3]]) diff --git a/.tether/man/op_cast.txt b/.tether/man/op_cast.txt new file mode 100644 index 0000000000..309520f5b2 --- /dev/null +++ b/.tether/man/op_cast.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.cast(x, dtype) +__doc__ +Cast a tensor to the desired dtype. + +Args: + x: A tensor or variable. + dtype: The target type. + +Returns: + A tensor of the specified `dtype`. + +Example: + +>>> x = keras.ops.arange(4) +>>> x = keras.ops.cast(x, dtype="float16") diff --git a/.tether/man/op_categorical_crossentropy.txt b/.tether/man/op_categorical_crossentropy.txt new file mode 100644 index 0000000000..4cdb638fd4 --- /dev/null +++ b/.tether/man/op_categorical_crossentropy.txt @@ -0,0 +1,48 @@ +__signature__ +keras.ops.categorical_crossentropy( + target, + output, + from_logits=False, + axis=-1 +) +__doc__ +Computes categorical cross-entropy loss between target and output tensor. + +The categorical cross-entropy loss is commonly used in multi-class +classification tasks where each input sample can belong to one of +multiple classes. It measures the dissimilarity +between the target and output probabilities or logits. + +Args: + target: The target tensor representing the true categorical labels. + Its shape should match the shape of the `output` tensor + except for the last dimension. + output: The output tensor representing the predicted probabilities + or logits. Its shape should match the shape of the `target` + tensor except for the last dimension. + from_logits: (optional) Whether `output` is a tensor of logits or + probabilities. + Set it to `True` if `output` represents logits; otherwise, + set it to `False` if `output` represents probabilities. + Defaults to`False`. + axis: (optional) The axis along which the categorical cross-entropy + is computed. + Defaults to `-1`, which corresponds to the last dimension of + the tensors. + +Returns: + Integer tensor: The computed categorical cross-entropy loss between + `target` and `output`. + +Example: + +>>> target = keras.ops.convert_to_tensor( +... [[1, 0, 0], +... [0, 1, 0], +... [0, 0, 1]]) +>>> output = keras.ops.convert_to_tensor( +... [[0.9, 0.05, 0.05], +... [0.1, 0.8, 0.1], +... [0.2, 0.3, 0.5]]) +>>> categorical_crossentropy(target, output) +array([0.10536054 0.22314355 0.6931472 ], shape=(3,), dtype=float32) diff --git a/.tether/man/op_ceil.txt b/.tether/man/op_ceil.txt new file mode 100644 index 0000000000..12d0bdf6be --- /dev/null +++ b/.tether/man/op_ceil.txt @@ -0,0 +1,13 @@ +__signature__ +keras.ops.ceil(x) +__doc__ +Return the ceiling of the input, element-wise. + +The ceil of the scalar `x` is the smallest integer `i`, such that +`i >= x`. + +Args: + x: Input tensor. + +Returns: + The ceiling of each element in `x`, with float dtype. diff --git a/.tether/man/op_cholesky.txt b/.tether/man/op_cholesky.txt new file mode 100644 index 0000000000..f5f95b6f6c --- /dev/null +++ b/.tether/man/op_cholesky.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.cholesky(x) +__doc__ +Computes the Cholesky decomposition of a positive semi-definite matrix. + +Args: + x: Input tensor of shape `(..., M, M)`. + +Returns: + A tensor of shape `(..., M, M)` representing the lower triangular + Cholesky factor of `x`. + diff --git a/.tether/man/op_clip.txt b/.tether/man/op_clip.txt new file mode 100644 index 0000000000..d36f1ca3d0 --- /dev/null +++ b/.tether/man/op_clip.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.clip( + x, + x_min, + x_max +) +__doc__ +Clip (limit) the values in a tensor. + +Given an interval, values outside the interval are clipped to the +interval edges. For example, if an interval of `[0, 1]` is specified, +values smaller than 0 become 0, and values larger than 1 become 1. + +Args: + x: Input tensor. + x_min: Minimum value. + x_max: Maximum value. +Returns: + The clipped tensor. diff --git a/.tether/man/op_concatenate.txt b/.tether/man/op_concatenate.txt new file mode 100644 index 0000000000..0c19fc6d0b --- /dev/null +++ b/.tether/man/op_concatenate.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.concatenate(xs, axis=0) +__doc__ +Join a sequence of tensors along an existing axis. + +Args: + xs: The sequence of tensors to concatenate. + axis: The axis along which the tensors will be joined. Defaults to `0`. + +Returns: + The concatenated tensor. diff --git a/.tether/man/op_cond.txt b/.tether/man/op_cond.txt new file mode 100644 index 0000000000..b18c71ced1 --- /dev/null +++ b/.tether/man/op_cond.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.cond( + pred, + true_fn, + false_fn +) +__doc__ +Conditionally applies `true_fn` or `false_fn`. + +Args: + pred: Boolean scalar type + true_fn: Callable returning the output for the `pred == True` case. + false_fn: Callable returning the output for the `pred == False` case. + +Returns: + The output of either `true_fn` or `false_fn` depending on pred. diff --git a/.tether/man/op_conj.txt b/.tether/man/op_conj.txt new file mode 100644 index 0000000000..d19a1551f1 --- /dev/null +++ b/.tether/man/op_conj.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.conjugate(x) +__doc__ +Returns the complex conjugate, element-wise. + +The complex conjugate of a complex number is obtained by changing the sign +of its imaginary part. + +`keras.ops.conj` is a shorthand for this function. + +Args: + x: Input tensor. + +Returns: + The complex conjugate of each element in `x`. + diff --git a/.tether/man/op_conjugate.txt b/.tether/man/op_conjugate.txt new file mode 100644 index 0000000000..06989fd287 --- /dev/null +++ b/.tether/man/op_conjugate.txt @@ -0,0 +1,15 @@ +__signature__ +keras.ops.conjugate(x) +__doc__ +Returns the complex conjugate, element-wise. + +The complex conjugate of a complex number is obtained by changing the sign +of its imaginary part. + +`keras.ops.conj` is a shorthand for this function. + +Args: + x: Input tensor. + +Returns: + The complex conjugate of each element in `x`. diff --git a/.tether/man/op_conv.txt b/.tether/man/op_conv.txt new file mode 100644 index 0000000000..cb851b3197 --- /dev/null +++ b/.tether/man/op_conv.txt @@ -0,0 +1,45 @@ +__signature__ +keras.ops.conv( + inputs, + kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +__doc__ +General N-D convolution. + +This ops supports 1D, 2D and 3D convolution. + +Args: + inputs: Tensor of rank N+2. `inputs` has shape + `(batch_size,) + inputs_spatial_shape + (num_channels,)` if + `data_format="channels_last"`, or + `(batch_size, num_channels) + inputs_spatial_shape` if + `data_format="channels_first"`. + kernel: Tensor of rank N+2. `kernel` has shape + `(kernel_spatial_shape, num_input_channels, num_output_channels)`. + `num_input_channels` should match the number of channels in + `inputs`. + strides: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the strides of the convolution along each spatial + dimension. If `strides` is int, then every spatial dimension shares + the same `strides`. + padding: string, either `"valid"` or `"same"`. `"valid"` means no + padding is applied, and `"same"` results in padding evenly to the + left/right or up/down of the input such that output has the + same height/width dimension as the input when `strides=1`. + data_format: A string, either `"channels_last"` or `"channels_first"`. + `data_format` determines the ordering of the dimensions in the + inputs. If `data_format="channels_last"`, `inputs` is of shape + `(batch_size, ..., channels)` while if + `data_format="channels_first"`, `inputs` is of shape + `(batch_size, channels, ...)`. + dilation_rate: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the dilation rate to use for dilated convolution. If + `dilation_rate` is int, then every spatial dimension shares + the same `dilation_rate`. + +Returns: + A tensor of rank N+2, the result of the conv operation. diff --git a/.tether/man/op_conv_transpose.txt b/.tether/man/op_conv_transpose.txt new file mode 100644 index 0000000000..dab311e058 --- /dev/null +++ b/.tether/man/op_conv_transpose.txt @@ -0,0 +1,53 @@ +__signature__ +keras.ops.conv_transpose( + inputs, + kernel, + strides, + padding='valid', + output_padding=None, + data_format=None, + dilation_rate=1 +) +__doc__ +General N-D convolution transpose. + +Also known as de-convolution. This ops supports 1D, 2D and 3D convolution. + +Args: + inputs: Tensor of rank N+2. `inputs` has shape + `(batch_size,) + inputs_spatial_shape + (num_channels,)` if + `data_format="channels_last"`, or + `(batch_size, num_channels) + inputs_spatial_shape` if + `data_format="channels_first"`. + kernel: Tensor of rank N+2. `kernel` has shape + [kernel_spatial_shape, num_output_channels, num_input_channels], + `num_input_channels` should match the number of channels in + `inputs`. + strides: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the strides of the convolution along each spatial + dimension. If `strides` is int, then every spatial dimension shares + the same `strides`. + padding: string, either `"valid"` or `"same"`. `"valid"` means no + padding is applied, and `"same"` results in padding evenly to the + left/right or up/down of the input such that output has the + same height/width dimension as the input when `strides=1`. + output_padding: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the amount of padding along the height and width of + the output tensor. Can be a single integer to specify the same + value for all spatial dimensions. The amount of output padding + along a given dimension must be lower than the stride along that + same dimension. If set to `None` (default), the output shape is + inferred. + data_format: A string, either `"channels_last"` or `"channels_first"`. + `data_format` determines the ordering of the dimensions in the + inputs. If `data_format="channels_last"`, `inputs` is of shape + `(batch_size, ..., channels)` while if + `data_format="channels_first"`, `inputs` is of shape + `(batch_size, channels, ...)`. + dilation_rate: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the dilation rate to use for dilated convolution. If + `dilation_rate` is int, then every spatial dimension shares + the same `dilation_rate`. + +Returns: + A tensor of rank N+2, the result of the conv operation. diff --git a/.tether/man/op_convert_to_numpy.txt b/.tether/man/op_convert_to_numpy.txt new file mode 100644 index 0000000000..c3ab6e3391 --- /dev/null +++ b/.tether/man/op_convert_to_numpy.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.convert_to_numpy(x) +__doc__ +Convert a tensor to a NumPy array. + +Args: + x: A tensor. + +Returns: + A NumPy array. diff --git a/.tether/man/op_convert_to_tensor.txt b/.tether/man/op_convert_to_tensor.txt new file mode 100644 index 0000000000..de11dbfd4b --- /dev/null +++ b/.tether/man/op_convert_to_tensor.txt @@ -0,0 +1,23 @@ +__signature__ +keras.ops.convert_to_tensor( + x, + dtype=None, + sparse=None +) +__doc__ +Convert a NumPy array to a tensor. + +Args: + x: A NumPy array. + dtype: The target type. + sparse: Whether to keep sparse tensors. `False` will cause sparse + tensors to be densified. The default value of `None` means that + sparse tensors are kept only if the backend supports them. + +Returns: + A tensor of the specified `dtype`. + +Example: + +>>> x = np.array([1, 2, 3]) +>>> y = keras.ops.convert_to_tensor(x) diff --git a/.tether/man/op_copy.txt b/.tether/man/op_copy.txt new file mode 100644 index 0000000000..8f567035b2 --- /dev/null +++ b/.tether/man/op_copy.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.copy(x) +__doc__ +Returns a copy of `x`. + +Args: + x: Input tensor. + +Returns: + A copy of `x`. diff --git a/.tether/man/op_correlate.txt b/.tether/man/op_correlate.txt new file mode 100644 index 0000000000..bfc329b45a --- /dev/null +++ b/.tether/man/op_correlate.txt @@ -0,0 +1,22 @@ +__signature__ +keras.ops.correlate( + x1, + x2, + mode='valid' +) +__doc__ +Compute the cross-correlation of two 1-dimensional tensors. + +Args: + x1: First 1-dimensional input tensor of length M. + x2: Second 1-dimensional input tensor of length N. + mode: Either `valid`, `same` or `full`. + By default the mode is set to `valid`, which returns + an output of length max(M, N) - min(M, N) + 1. + `same` returns an output of length max(M, N). + `full` mode returns the convolution at each point of + overlap, with an output length of N+M-1 + +Returns: + Output tensor, cross-correlation of `x1` and `x2`. + diff --git a/.tether/man/op_cos.txt b/.tether/man/op_cos.txt new file mode 100644 index 0000000000..940b93b709 --- /dev/null +++ b/.tether/man/op_cos.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.cos(x) +__doc__ +Cosine, element-wise. + +Args: + x: Input tensor. + +Returns: + The corresponding cosine values. diff --git a/.tether/man/op_cosh.txt b/.tether/man/op_cosh.txt new file mode 100644 index 0000000000..322f5298cc --- /dev/null +++ b/.tether/man/op_cosh.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.cosh(x) +__doc__ +Hyperbolic cosine, element-wise. + +Arguments: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. diff --git a/.tether/man/op_count_nonzero.txt b/.tether/man/op_count_nonzero.txt new file mode 100644 index 0000000000..e08c4c6c09 --- /dev/null +++ b/.tether/man/op_count_nonzero.txt @@ -0,0 +1,23 @@ +__signature__ +keras.ops.count_nonzero(x, axis=None) +__doc__ +Counts the number of non-zero values in `x` along the given `axis`. + +If no axis is specified then all non-zeros in the tensor are counted. + +Args: + x: Input tensor. + axis: Axis or tuple of axes along which to count the number of + non-zeros. Defaults to `None`. + +Returns: + int or tensor of ints. + +Examples: +>>> x = keras.ops.array([[0, 1, 7, 0], [3, 0, 2, 19]]) +>>> keras.ops.count_nonzero(x) +5 +>>> keras.ops.count_nonzero(x, axis=0) +array([1, 1, 2, 1], dtype=int64) +>>> keras.ops.count_nonzero(x, axis=1) +array([2, 3], dtype=int64) diff --git a/.tether/man/op_cross.txt b/.tether/man/op_cross.txt new file mode 100644 index 0000000000..ee7eb37213 --- /dev/null +++ b/.tether/man/op_cross.txt @@ -0,0 +1,42 @@ +__signature__ +keras.ops.cross( + x1, + x2, + axisa=-1, + axisb=-1, + axisc=-1, + axis=None +) +__doc__ +Returns the cross product of two (arrays of) vectors. + +The cross product of `x1` and `x2` in R^3 is a vector +perpendicular to both `x1` and `x2`. If `x1` and `x2` are arrays of +vectors, the vectors are defined by the last axis of `x1` and `x2` +by default, and these axes can have dimensions 2 or 3. + +Where the dimension of either `x1` or `x2` is 2, the third component of +the input vector is assumed to be zero and the cross product calculated +accordingly. + +In cases where both input vectors have dimension 2, the z-component of +the cross product is returned. + +Args: + x1: Components of the first vector(s). + x2: Components of the second vector(s). + axisa: Axis of `x1` that defines the vector(s). Defaults to `-1`. + axisb: Axis of `x2` that defines the vector(s). Defaults to `-1`. + axisc: Axis of the result containing the cross product vector(s). + Ignored if both input vectors have dimension 2, as the return is + scalar. By default, the last axis. + axis: If defined, the axis of `x1`, `x2` and the result that + defines the vector(s) and cross product(s). Overrides `axisa`, + `axisb` and `axisc`. + +Note: + Torch backend does not support two dimensional vectors, or the + arguments `axisa`, `axisb` and `axisc`. Use `axis` instead. + +Returns: + Vector cross product(s). diff --git a/.tether/man/op_ctc_decode.txt b/.tether/man/op_ctc_decode.txt new file mode 100644 index 0000000000..53d6959e75 --- /dev/null +++ b/.tether/man/op_ctc_decode.txt @@ -0,0 +1,37 @@ +__signature__ +keras.ops.ctc_decode( + inputs, + sequence_lengths, + strategy, + beam_width=100, + top_paths=1, + merge_repeated=True, + mask_index=None +) +__doc__ +Decodes the output of a CTC model. + +Args: + inputs: A tensor of shape `(batch_size, max_length, num_classes)` + containing the logits (output of the model). + sequence_lengths: A tensor of shape `(batch_size,)` containing the + sequence lengths for the batch. + strategy: A string for the decoding strategy. Supported values are + `"greedy"` and `"beam_search"`. + beam_width: An integer scalar beam width used in beam search. + Defaults to 100. + top_paths: An integer scalar, the number of top paths to return. + Defaults to 1. + merge_repeated: A boolean scalar, whether to merge repeated + labels in the output. Defaults to `True`. + mask_index: An integer scalar, the index of the mask character in + the vocabulary. Defaults to `None`. + +Returns: + A tuple containing: + + - A list of decoded sequences. + - A list of the negative of the sum of the probability logits + (if strategy is `"greedy"`) or the log probability (if strategy is + `"beam_search"`) for each sequence. + diff --git a/.tether/man/op_ctc_loss.txt b/.tether/man/op_ctc_loss.txt new file mode 100644 index 0000000000..32b7f36d01 --- /dev/null +++ b/.tether/man/op_ctc_loss.txt @@ -0,0 +1,23 @@ +__signature__ +keras.ops.ctc_loss( + target, + output, + target_length, + output_length, + mask_index=0 +) +__doc__ +CTC (Connectionist Temporal Classification) loss. + +Args: + target: A tensor of shape `(batch_size, max_length)` containing + the true labels in integer format. + output: A tensor of shape `(batch_size, max_length, num_classes)` + containing logits (the output of your model). + target_length: A tensor of shape `(batch_size,)` containing the + true label lengths. + output_length: A tensor of shape `(batch_size,)` containing the + output lengths. + mask_index: The index of the mask character in the vocabulary. + Defaults to `0`. + diff --git a/.tether/man/op_cumprod.txt b/.tether/man/op_cumprod.txt new file mode 100644 index 0000000000..94c0f1cab9 --- /dev/null +++ b/.tether/man/op_cumprod.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.cumprod( + x, + axis=None, + dtype=None +) +__doc__ +Return the cumulative product of elements along a given axis. + +Args: + x: Input tensor. + axis: Axis along which the cumulative product is computed. + By default the input is flattened. + dtype: dtype of returned tensor. Defaults to x.dtype. + +Returns: + Output tensor. diff --git a/.tether/man/op_cumsum.txt b/.tether/man/op_cumsum.txt new file mode 100644 index 0000000000..e526865d6f --- /dev/null +++ b/.tether/man/op_cumsum.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.cumsum( + x, + axis=None, + dtype=None +) +__doc__ +Returns the cumulative sum of elements along a given axis. + +Args: + x: Input tensor. + axis: Axis along which the cumulative sum is computed. + By default the input is flattened. + dtype: dtype of returned tensor. Defaults to x.dtype. + +Returns: + Output tensor. diff --git a/.tether/man/op_custom_gradient.txt b/.tether/man/op_custom_gradient.txt new file mode 100644 index 0000000000..c357df36a1 --- /dev/null +++ b/.tether/man/op_custom_gradient.txt @@ -0,0 +1,82 @@ +__signature__ +keras.ops.custom_gradient(f) +__doc__ +Decorator to define a function with a custom gradient. + +This decorator allows fine grained control over the gradients of a sequence +for operations. This may be useful for multiple reasons, including providing +a more efficient or numerically stable gradient for a sequence of +operations. + +Args: + f: Function `f(*args)` that returns a tuple + `(output, grad_fn)`, where: + - `args` is a sequence of (nested structures of) tensor inputs to + the function. + - `output` is a (nested structure of) tensor outputs of applying + operations in `forward_fn` to `args`. + - `grad_fn` is a function with the signature `grad_fn(*args, + upstream)` which returns a tuple of tensors the same size as + (flattened) `args`: the derivatives of tensors in `output` with + respect to the tensors in `args`. `upstream` is a tensor or + sequence of tensors holding the initial value gradients for each + tensor in `output`. + +Returns: + A function `h(*args)` which returns the same value as + `f(*args)[0]` and whose gradient is determined by + `f(*args)[1]`. + + +Examples: + +1. Backend-agnostic example. + +```python +@ops.custom_gradient +def log1pexp(x): + e = ops.exp(x) + + def grad(*args, upstream=None): + if upstream is None: + (upstream,) = args + return ops.multiply(upstream, 1.0 - 1.0 / ops.add(1, e)) + + return ops.log(1 + e), grad +``` + +Note that the grad function that returns gradient computation +requires `args` as well as an `upstream` keyword argument, depending +on the backend being set. With the JAX and TensorFlow backends, +it requires only one argument, whereas it might use the `upstream` +argument in the case of the PyTorch backend. + +When working with TensorFlow/JAX backend, `grad(upstream)` +is sufficient. With PyTorch, the `grad` function requires +`*args` as well as `upstream`, e.g. `def grad(*args, upstream)`. +Follow the previous example to use `@ops.custom_gradient` in +a way that is compatible with all backends. + +2. Here's JAX & TensorFlow-specific example: + +```python +@ops.custom_gradient +def log1pexp(x): + e = ops.exp(x) + def grad(upstream): + return ops.multiply(upstream, 1.0 - 1.0 / ops.add(1, e)) + return ops.log(1 + e), grad +``` + +3. Lastly, here's a PyTorch-specific example, +using `*args` & `upstream`: + +```python +@ops.custom_gradient +def log1pexp(x): + e = ops.exp(x) + def grad(*args, upstream): + return ops.multiply(upstream, 1.0 - 1.0 / ops.add(1, e)) + return ops.log(1 + e), grad +``` + diff --git a/.tether/man/op_depthwise_conv.txt b/.tether/man/op_depthwise_conv.txt new file mode 100644 index 0000000000..2e33d57c47 --- /dev/null +++ b/.tether/man/op_depthwise_conv.txt @@ -0,0 +1,45 @@ +__signature__ +keras.ops.depthwise_conv( + inputs, + kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +__doc__ +General N-D depthwise convolution. + +This ops supports 1D and 2D depthwise convolution. + +Args: + inputs: Tensor of rank N+2. `inputs` has shape + `(batch_size,) + inputs_spatial_shape + (num_channels,)` if + `data_format="channels_last"`, or + `(batch_size, num_channels) + inputs_spatial_shape` if + `data_format="channels_first"`. + kernel: Tensor of rank N+2. `kernel` has shape + [kernel_spatial_shape, num_input_channels, num_channels_multiplier], + `num_input_channels` should match the number of channels in + `inputs`. + strides: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the strides of the convolution along each spatial + dimension. If `strides` is int, then every spatial dimension shares + the same `strides`. + padding: string, either `"valid"` or `"same"`. `"valid"` means no + padding is applied, and `"same"` results in padding evenly to the + left/right or up/down of the input such that output has the + same height/width dimension as the input when `strides=1`. + data_format: A string, either `"channels_last"` or `"channels_first"`. + `data_format` determines the ordering of the dimensions in the + inputs. If `data_format="channels_last"`, `inputs` is of shape + `(batch_size, ..., channels)` while if + `data_format="channels_first"`, `inputs` is of shape + `(batch_size, channels, ...)`. + dilation_rate: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the dilation rate to use for dilated convolution. If + `dilation_rate` is int, then every spatial dimension shares + the same `dilation_rate`. + +Returns: + A tensor of rank N+2, the result of the depthwise conv operation. diff --git a/.tether/man/op_det.txt b/.tether/man/op_det.txt new file mode 100644 index 0000000000..549f1cd79f --- /dev/null +++ b/.tether/man/op_det.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.det(x) +__doc__ +Computes the determinant of a square tensor. + +Args: + x: Input tensor of shape `(..., M, M)`. + +Returns: + A tensor of shape `(...,)` represeting the determinant of `x`. + diff --git a/.tether/man/op_diag.txt b/.tether/man/op_diag.txt new file mode 100644 index 0000000000..875976e09a --- /dev/null +++ b/.tether/man/op_diag.txt @@ -0,0 +1,35 @@ +__signature__ +keras.ops.diag(x, k=0) +__doc__ +Extract a diagonal or construct a diagonal array. + +Args: + x: Input tensor. If `x` is 2-D, returns the k-th diagonal of `x`. + If `x` is 1-D, return a 2-D tensor with `x` on the k-th diagonal. + k: The diagonal to consider. Defaults to `0`. Use `k > 0` for diagonals + above the main diagonal, and `k < 0` for diagonals below + the main diagonal. + +Returns: + The extracted diagonal or constructed diagonal tensor. + +Examples: +>>> from keras.src import ops +>>> x = ops.arange(9).reshape((3, 3)) +>>> x +array([[0, 1, 2], + [3, 4, 5], + [6, 7, 8]]) + +>>> ops.diag(x) +array([0, 4, 8]) +>>> ops.diag(x, k=1) +array([1, 5]) +>>> ops.diag(x, k=-1) +array([3, 7]) + +>>> ops.diag(ops.diag(x))) +array([[0, 0, 0], + [0, 4, 0], + [0, 0, 8]]) + diff --git a/.tether/man/op_diagonal.txt b/.tether/man/op_diagonal.txt new file mode 100644 index 0000000000..6aa941ac52 --- /dev/null +++ b/.tether/man/op_diagonal.txt @@ -0,0 +1,54 @@ +__signature__ +keras.ops.diagonal( + x, + offset=0, + axis1=0, + axis2=1 +) +__doc__ +Return specified diagonals. + +If `x` is 2-D, returns the diagonal of `x` with the given offset, i.e., the +collection of elements of the form `x[i, i+offset]`. + +If `x` has more than two dimensions, the axes specified by `axis1` +and `axis2` are used to determine the 2-D sub-array whose diagonal +is returned. + +The shape of the resulting array can be determined by removing `axis1` +and `axis2` and appending an index to the right equal to the size of +the resulting diagonals. + +Args: + x: Input tensor. + offset: Offset of the diagonal from the main diagonal. + Can be positive or negative. Defaults to `0`.(main diagonal). + axis1: Axis to be used as the first axis of the 2-D sub-arrays. + Defaults to `0`.(first axis). + axis2: Axis to be used as the second axis of the 2-D sub-arrays. + Defaults to `1` (second axis). + +Returns: + Tensor of diagonals. + +Examples: +>>> from keras.src import ops +>>> x = ops.arange(4).reshape((2, 2)) +>>> x +array([[0, 1], + [2, 3]]) +>>> x.diagonal() +array([0, 3]) +>>> x.diagonal(1) +array([1]) + +>>> x = ops.arange(8).reshape((2, 2, 2)) +>>> x +array([[[0, 1], + [2, 3]], + [[4, 5], + [6, 7]]]) +>>> x.diagonal(0, 0, 1) +array([[0, 6], + [1, 7]]) + diff --git a/.tether/man/op_diff.txt b/.tether/man/op_diff.txt new file mode 100644 index 0000000000..37f439e675 --- /dev/null +++ b/.tether/man/op_diff.txt @@ -0,0 +1,37 @@ +__signature__ +keras.ops.diff( + a, + n=1, + axis=-1 +) +__doc__ +Calculate the n-th discrete difference along the given axis. + +The first difference is given by `out[i] = a[i+1] - a[i]` along +the given axis, higher differences are calculated by using `diff` +recursively. + +Args: + a: Input tensor. + n: The number of times values are differenced. Defaults to `1`. + axis: Axis to compute discrete difference(s) along. + Defaults to `-1`.(last axis). + +Returns: + Tensor of diagonals. + +Examples: +>>> from keras.src import ops +>>> x = ops.convert_to_tensor([1, 2, 4, 7, 0]) +>>> ops.diff(x) +array([ 1, 2, 3, -7]) +>>> ops.diff(x, n=2) +array([ 1, 1, -10]) + +>>> x = ops.convert_to_tensor([[1, 3, 6, 10], [0, 5, 6, 8]]) +>>> ops.diff(x) +array([[2, 3, 4], + [5, 1, 2]]) +>>> ops.diff(x, axis=0) +array([[-1, 2, 0, -2]]) + diff --git a/.tether/man/op_digitize.txt b/.tether/man/op_digitize.txt new file mode 100644 index 0000000000..f0bf098c01 --- /dev/null +++ b/.tether/man/op_digitize.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.digitize(x, bins) +__doc__ +Returns the indices of the bins to which each value in `x` belongs. + +Args: + x: Input array to be binned. + bins: Array of bins. It has to be one-dimensional and monotonically + increasing. + +Returns: + Output array of indices, of same shape as `x`. + +Example: +>>> x = np.array([0.0, 1.0, 3.0, 1.6]) +>>> bins = np.array([0.0, 3.0, 4.5, 7.0]) +>>> keras.ops.digitize(x, bins) +array([1, 1, 2, 1]) diff --git a/.tether/man/op_divide.txt b/.tether/man/op_divide.txt new file mode 100644 index 0000000000..8c5c57787e --- /dev/null +++ b/.tether/man/op_divide.txt @@ -0,0 +1,13 @@ +__signature__ +keras.ops.divide(x1, x2) +__doc__ +Divide arguments element-wise. + +`keras.ops.true_divide` is an alias for this function. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, the quotient `x1/x2`, element-wise. diff --git a/.tether/man/op_divide_no_nan.txt b/.tether/man/op_divide_no_nan.txt new file mode 100644 index 0000000000..f536cdcab3 --- /dev/null +++ b/.tether/man/op_divide_no_nan.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.divide_no_nan(x1, x2) +__doc__ +Safe element-wise division which returns 0 where the denominator is 0. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + The quotient `x1/x2`, element-wise, with zero where x2 is zero. + diff --git a/.tether/man/op_dot.txt b/.tether/man/op_dot.txt new file mode 100644 index 0000000000..0f69402f27 --- /dev/null +++ b/.tether/man/op_dot.txt @@ -0,0 +1,24 @@ +__signature__ +keras.ops.dot(x1, x2) +__doc__ +Dot product of two tensors. + +- If both `x1` and `x2` are 1-D tensors, it is inner product of vectors + (without complex conjugation). +- If both `x1` and `x2` are 2-D tensors, it is matrix multiplication. +- If either `x1` or `x2` is 0-D (scalar), it is equivalent to `x1 * x2`. +- If `x1` is an N-D tensor and `x2` is a 1-D tensor, it is a sum product + over the last axis of `x1` and `x2`. +- If `x1` is an N-D tensor and `x2` is an M-D tensor (where `M>=2`), + it is a sum product over the last axis of `x1` and the second-to-last + axis of `x2`: `dot(x1, x2)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])`. + +Args: + x1: First argument. + x2: Second argument. + +Note: + Torch backend does not accept 0-D tensors as arguments. + +Returns: + Dot product of `x1` and `x2`. diff --git a/.tether/man/op_eig.txt b/.tether/man/op_eig.txt new file mode 100644 index 0000000000..b56bbaf46c --- /dev/null +++ b/.tether/man/op_eig.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.eig(x) +__doc__ +Computes the eigenvalues and eigenvectors of a square matrix. + +Args: + x: Input tensor of shape `(..., M, M)`. + +Returns: + A tuple of two tensors: a tensor of shape `(..., M)` containing + eigenvalues and a tensor of shape `(..., M, M)` containing eigenvectors. + diff --git a/.tether/man/op_eigh.txt b/.tether/man/op_eigh.txt new file mode 100644 index 0000000000..1b9729d993 --- /dev/null +++ b/.tether/man/op_eigh.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.eigh(x) +__doc__ +Computes the eigenvalues and eigenvectors of a complex Hermitian. + +Args: + x: Input tensor of shape `(..., M, M)`. + +Returns: + A tuple of two tensors: a tensor of shape `(..., M)` containing + eigenvalues and a tensor of shape `(..., M, M)` containing eigenvectors. + diff --git a/.tether/man/op_einsum.txt b/.tether/man/op_einsum.txt new file mode 100644 index 0000000000..9591c8a542 --- /dev/null +++ b/.tether/man/op_einsum.txt @@ -0,0 +1,85 @@ +__signature__ +keras.ops.einsum(subscripts, *operands) +__doc__ +Evaluates the Einstein summation convention on the operands. + +Args: + subscripts: Specifies the subscripts for summation as comma separated + list of subscript labels. An implicit (classical Einstein + summation) calculation is performed unless the explicit indicator + `->` is included as well as subscript labels of the precise + output form. + operands: The operands to compute the Einstein sum of. + +Returns: + The calculation based on the Einstein summation convention. + +Example: +>>> from keras.src import ops +>>> a = ops.arange(25).reshape(5, 5) +>>> b = ops.arange(5) +>>> c = ops.arange(6).reshape(2, 3) + +Trace of a matrix: + +>>> ops.einsum("ii", a) +60 +>>> ops.einsum(a, [0, 0]) +60 +>>> ops.trace(a) +60 + +Extract the diagonal: + +>>> ops.einsum("ii -> i", a) +array([ 0, 6, 12, 18, 24]) +>>> ops.einsum(a, [0, 0], [0]) +array([ 0, 6, 12, 18, 24]) +>>> ops.diag(a) +array([ 0, 6, 12, 18, 24]) + +Sum over an axis: + +>>> ops.einsum("ij -> i", a) +array([ 10, 35, 60, 85, 110]) +>>> ops.einsum(a, [0, 1], [0]) +array([ 10, 35, 60, 85, 110]) +>>> ops.sum(a, axis=1) +array([ 10, 35, 60, 85, 110]) + +For higher dimensional tensors summing a single axis can be done +with ellipsis: + +>>> ops.einsum("...j -> ...", a) +array([ 10, 35, 60, 85, 110]) +>>> np.einsum(a, [..., 1], [...]) +array([ 10, 35, 60, 85, 110]) + +Compute a matrix transpose or reorder any number of axes: + +>>> ops.einsum("ji", c) +array([[0, 3], + [1, 4], + [2, 5]]) +>>> ops.einsum("ij -> ji", c) +array([[0, 3], + [1, 4], + [2, 5]]) +>>> ops.einsum(c, [1, 0]) +array([[0, 3], + [1, 4], + [2, 5]]) +>>> ops.transpose(c) +array([[0, 3], + [1, 4], + [2, 5]]) + +Matrix vector multiplication: + +>>> ops.einsum("ij, j", a, b) +array([ 30, 80, 130, 180, 230]) +>>> ops.einsum(a, [0, 1], b, [1]) +array([ 30, 80, 130, 180, 230]) +>>> ops.einsum("...j, j", a, b) +array([ 30, 80, 130, 180, 230]) + diff --git a/.tether/man/op_elu.txt b/.tether/man/op_elu.txt new file mode 100644 index 0000000000..22032ba76a --- /dev/null +++ b/.tether/man/op_elu.txt @@ -0,0 +1,22 @@ +__signature__ +keras.ops.elu(x, alpha=1.0) +__doc__ +Exponential Linear Unit activation function. + +It is defined as: + +`f(x) = alpha * (exp(x) - 1.) for x < 0`, `f(x) = x for x >= 0`. + +Args: + x: Input tensor. + alpha: A scalar, slope of positive section. Defaults to `1.0`. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = np.array([-1., 0., 1.]) +>>> x_elu = keras.ops.elu(x) +>>> print(x_elu) +array([-0.63212055, 0., 1.], shape=(3,), dtype=float64) diff --git a/.tether/man/op_empty.txt b/.tether/man/op_empty.txt new file mode 100644 index 0000000000..d32fa88e93 --- /dev/null +++ b/.tether/man/op_empty.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.empty(shape, dtype=None) +__doc__ +Return a tensor of given shape and type filled with uninitialized data. + +Args: + shape: Shape of the empty tensor. + dtype: Desired data type of the empty tensor. + +Returns: + The empty tensor. diff --git a/.tether/man/op_equal.txt b/.tether/man/op_equal.txt new file mode 100644 index 0000000000..cc4b00300e --- /dev/null +++ b/.tether/man/op_equal.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.equal(x1, x2) +__doc__ +Returns `(x1 == x2)` element-wise. + +Args: + x1: Tensor to compare. + x2: Tensor to compare. + +Returns: + Output tensor, element-wise comparison of `x1` and `x2`. diff --git a/.tether/man/op_erf.txt b/.tether/man/op_erf.txt new file mode 100644 index 0000000000..bbca02ca78 --- /dev/null +++ b/.tether/man/op_erf.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.erf(x) +__doc__ +Computes the error function of `x`, element-wise. + +Args: + x: Input tensor. + +Returns: + A tensor with the same dtype as `x`. + +Example: + +>>> x = np.array([-3.0, -2.0, -1.0, 0.0, 1.0]) +>>> keras.ops.erf(x) +array([-0.99998 , -0.99532, -0.842701, 0., 0.842701], dtype=float32) diff --git a/.tether/man/op_erfinv.txt b/.tether/man/op_erfinv.txt new file mode 100644 index 0000000000..63a2ab5b0b --- /dev/null +++ b/.tether/man/op_erfinv.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.erfinv(x) +__doc__ +Computes the inverse error function of `x`, element-wise. + +Args: + x: Input tensor. + +Returns: + A tensor with the same dtype as `x`. + +Example: + +>>> x = np.array([-0.5, -0.2, -0.1, 0.0, 0.3]) +>>> keras.ops.erfinv(x) +array([-0.47694, -0.17914, -0.08886, 0. , 0.27246], dtype=float32) + diff --git a/.tether/man/op_exp.txt b/.tether/man/op_exp.txt new file mode 100644 index 0000000000..152ee862ad --- /dev/null +++ b/.tether/man/op_exp.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.exp(x) +__doc__ +Calculate the exponential of all elements in the input tensor. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise exponential of `x`. diff --git a/.tether/man/op_expand_dims.txt b/.tether/man/op_expand_dims.txt new file mode 100644 index 0000000000..cbf5ddbaae --- /dev/null +++ b/.tether/man/op_expand_dims.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.expand_dims(x, axis) +__doc__ +Expand the shape of a tensor. + +Insert a new axis at the `axis` position in the expanded tensor shape. + +Args: + x: Input tensor. + axis: Position in the expanded axes where the new axis + (or axes) is placed. + +Returns: + Output tensor with the number of dimensions increased. diff --git a/.tether/man/op_expm1.txt b/.tether/man/op_expm1.txt new file mode 100644 index 0000000000..e410d7ad21 --- /dev/null +++ b/.tether/man/op_expm1.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.expm1(x) +__doc__ +Calculate `exp(x) - 1` for all elements in the tensor. + +Args: + x: Input values. + +Returns: + Output tensor, element-wise exponential minus one. diff --git a/.tether/man/op_extract_sequences.txt b/.tether/man/op_extract_sequences.txt new file mode 100644 index 0000000000..85bf2e4a1d --- /dev/null +++ b/.tether/man/op_extract_sequences.txt @@ -0,0 +1,32 @@ +__signature__ +keras.ops.extract_sequences( + x, + sequence_length, + sequence_stride +) +__doc__ +Expands the dimension of last axis into sequences of `sequence_length`. + +Slides a window of size `sequence_length` over the last axis of the input +with a stride of `sequence_stride`, replacing the last axis with +`[num_sequences, sequence_length]` sequences. + +If the dimension along the last axis is N, the number of sequences can be +computed by: + +`num_sequences = 1 + (N - sequence_length) // sequence_stride` + +Args: + x: Input tensor. + sequence_length: An integer representing the sequences length. + sequence_stride: An integer representing the sequences hop size. + +Returns: + A tensor of sequences with shape [..., num_sequences, sequence_length]. + +Example: + +>>> x = keras.ops.convert_to_tensor([1, 2, 3, 4, 5, 6]) +>>> extract_sequences(x, 3, 2) +array([[1, 2, 3], + [3, 4, 5]]) diff --git a/.tether/man/op_eye.txt b/.tether/man/op_eye.txt new file mode 100644 index 0000000000..5ba222f358 --- /dev/null +++ b/.tether/man/op_eye.txt @@ -0,0 +1,20 @@ +__signature__ +keras.ops.eye( + N, + M=None, + k=0, + dtype=None +) +__doc__ +Return a 2-D tensor with ones on the diagonal and zeros elsewhere. + +Args: + N: Number of rows in the output. + M: Number of columns in the output. If `None`, defaults to `N`. + k: Index of the diagonal: 0 (the default) refers to the main + diagonal, a positive value refers to an upper diagonal, + and a negative value to a lower diagonal. + dtype: Data type of the returned tensor. + +Returns: + Tensor with ones on the k-th diagonal and zeros elsewhere. diff --git a/.tether/man/op_fft.txt b/.tether/man/op_fft.txt new file mode 100644 index 0000000000..e96eb4cd55 --- /dev/null +++ b/.tether/man/op_fft.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.fft(x) +__doc__ +Computes the Fast Fourier Transform along last axis of input. + +Args: + x: Tuple of the real and imaginary parts of the input tensor. Both + tensors in the tuple should be of floating type. + +Returns: + A tuple containing two tensors - the real and imaginary parts of the + output tensor. + +Example: + +>>> x = ( +... keras.ops.convert_to_tensor([1., 2.]), +... keras.ops.convert_to_tensor([0., 1.]), +... ) +>>> fft(x) +(array([ 3., -1.], dtype=float32), array([ 1., -1.], dtype=float32)) diff --git a/.tether/man/op_fft2.txt b/.tether/man/op_fft2.txt new file mode 100644 index 0000000000..ac8c03ed61 --- /dev/null +++ b/.tether/man/op_fft2.txt @@ -0,0 +1,23 @@ +__signature__ +keras.ops.fft2(x) +__doc__ +Computes the 2D Fast Fourier Transform along the last two axes of input. + +Args: + x: Tuple of the real and imaginary parts of the input tensor. Both + tensors in the tuple should be of floating type. + +Returns: + A tuple containing two tensors - the real and imaginary parts of the + output. + +Example: + +>>> x = ( +... keras.ops.convert_to_tensor([[1., 2.], [2., 1.]]), +... keras.ops.convert_to_tensor([[0., 1.], [1., 0.]]), +... ) +>>> fft2(x) +(array([[ 6., 0.], + [ 0., -2.]], dtype=float32), array([[ 2., 0.], + [ 0., -2.]], dtype=float32)) diff --git a/.tether/man/op_flip.txt b/.tether/man/op_flip.txt new file mode 100644 index 0000000000..d0e1ba10b2 --- /dev/null +++ b/.tether/man/op_flip.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.flip(x, axis=None) +__doc__ +Reverse the order of elements in the tensor along the given axis. + +The shape of the tensor is preserved, but the elements are reordered. + +Args: + x: Input tensor. + axis: Axis or axes along which to flip the tensor. The default, + `axis=None`, will flip over all of the axes of the input tensor. + +Returns: + Output tensor with entries of `axis` reversed. diff --git a/.tether/man/op_floor.txt b/.tether/man/op_floor.txt new file mode 100644 index 0000000000..1a629fd549 --- /dev/null +++ b/.tether/man/op_floor.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.floor(x) +__doc__ +Return the floor of the input, element-wise. + +The floor of the scalar `x` is the largest integer `i`, such that `i <= x`. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise floor of `x`. diff --git a/.tether/man/op_floor_divide.txt b/.tether/man/op_floor_divide.txt new file mode 100644 index 0000000000..fdeeabc520 --- /dev/null +++ b/.tether/man/op_floor_divide.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.floor_divide(x1, x2) +__doc__ +Returns the largest integer smaller or equal to the division of inputs. + +Args: + x1: Numerator. + x2: Denominator. + +Returns: + Output tensor, `y = floor(x1/x2)` diff --git a/.tether/man/op_fori_loop.txt b/.tether/man/op_fori_loop.txt new file mode 100644 index 0000000000..cea790ee0c --- /dev/null +++ b/.tether/man/op_fori_loop.txt @@ -0,0 +1,29 @@ +__signature__ +keras.ops.fori_loop( + lower, + upper, + body_fun, + init_val +) +__doc__ +For loop implementation. + +Args: + lower: The initial value of the loop variable. + upper: The upper bound of the loop variable. + body_fun: A callable that represents the loop body. Must take two + arguments: the loop variable and the loop state. The loop state + should be updated and returned by this function. + init_val: The initial value of the loop state. + +Returns: + The final state after the loop. + +Example: + +>>> lower = 0 +>>> upper = 10 +>>> body_fun = lambda i, s: (i + 1, s + i) +>>> init_val = 0 +>>> keras.ops.fori_loop(lower, upper, body_fun, init_val) +45 diff --git a/.tether/man/op_full.txt b/.tether/man/op_full.txt new file mode 100644 index 0000000000..8ec92de220 --- /dev/null +++ b/.tether/man/op_full.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.full( + shape, + fill_value, + dtype=None +) +__doc__ +Return a new tensor of given shape and type, filled with `fill_value`. + +Args: + shape: Shape of the new tensor. + fill_value: Fill value. + dtype: Desired data type of the tensor. + +Returns: + Output tensor. diff --git a/.tether/man/op_full_like.txt b/.tether/man/op_full_like.txt new file mode 100644 index 0000000000..baafa7f004 --- /dev/null +++ b/.tether/man/op_full_like.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.full_like( + x, + fill_value, + dtype=None +) +__doc__ +Return a full tensor with the same shape and type as the given tensor. + +Args: + x: Input tensor. + fill_value: Fill value. + dtype: Overrides data type of the result. + +Returns: + Tensor of `fill_value` with the same shape and type as `x`. diff --git a/.tether/man/op_gelu.txt b/.tether/man/op_gelu.txt new file mode 100644 index 0000000000..5fe7d09238 --- /dev/null +++ b/.tether/man/op_gelu.txt @@ -0,0 +1,25 @@ +__signature__ +keras.ops.gelu(x, approximate=True) +__doc__ +Gaussian Error Linear Unit (GELU) activation function. + +If `approximate` is `True`, it is defined as: +`f(x) = 0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` + +Or if `approximate` is `False`, it is defined as: +`f(x) = x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, +where `P(X) ~ N(0, 1)`. + +Args: + x: Input tensor. + approximate: Approximate version of GELU activation. Defaults to `True`. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = np.array([-1., 0., 1.]) +>>> x_gelu = keras.ops.gelu(x) +>>> print(x_gelu) +array([-0.15865525, 0., 0.84134475], shape=(3,), dtype=float64) diff --git a/.tether/man/op_get_item.txt b/.tether/man/op_get_item.txt new file mode 100644 index 0000000000..432c9a08a7 --- /dev/null +++ b/.tether/man/op_get_item.txt @@ -0,0 +1,4 @@ +__signature__ +keras.ops.get_item(x, key) +__doc__ +Return `x[key]`. diff --git a/.tether/man/op_greater.txt b/.tether/man/op_greater.txt new file mode 100644 index 0000000000..6e2e7b2c61 --- /dev/null +++ b/.tether/man/op_greater.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.greater(x1, x2) +__doc__ +Return the truth value of `x1 > x2` element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, element-wise comparison of `x1` and `x2`. diff --git a/.tether/man/op_greater_equal.txt b/.tether/man/op_greater_equal.txt new file mode 100644 index 0000000000..7200721f15 --- /dev/null +++ b/.tether/man/op_greater_equal.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.greater_equal(x1, x2) +__doc__ +Return the truth value of `x1 >= x2` element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, element-wise comparison of `x1` and `x2`. diff --git a/.tether/man/op_hard_sigmoid.txt b/.tether/man/op_hard_sigmoid.txt new file mode 100644 index 0000000000..8a16c0d350 --- /dev/null +++ b/.tether/man/op_hard_sigmoid.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.hard_sigmoid(x) +__doc__ +Hard sigmoid activation function. + +It is defined as: + +`0 if x < -2.5`, `1 if x > 2.5`, `(0.2 * x) + 0.5 if -2.5 <= x <= 2.5`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = np.array([-1., 0., 1.]) +>>> x_hard_sigmoid = keras.ops.hard_sigmoid(x) +>>> print(x_hard_sigmoid) +array([0.3, 0.5, 0.7], shape=(3,), dtype=float64) diff --git a/.tether/man/op_hard_silu.txt b/.tether/man/op_hard_silu.txt new file mode 100644 index 0000000000..3071a71074 --- /dev/null +++ b/.tether/man/op_hard_silu.txt @@ -0,0 +1,25 @@ +__signature__ +keras.ops.hard_silu(x) +__doc__ +Hard SiLU activation function, also known as Hard Swish. + +It is defined as: + +- `0` if `if x < -3` +- `x` if `x > 3` +- `x * (x + 3) / 6` if `-3 <= x <= 3` + +It's a faster, piecewise linear approximation of the silu activation. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-3.0, -1.0, 0.0, 1.0, 3.0]) +>>> keras.ops.hard_silu(x) +array([-0.0, -0.3333333, 0.0, 0.6666667, 3.0], shape=(5,), dtype=float32) + diff --git a/.tether/man/op_hstack.txt b/.tether/man/op_hstack.txt new file mode 100644 index 0000000000..bffef365f8 --- /dev/null +++ b/.tether/man/op_hstack.txt @@ -0,0 +1,13 @@ +__signature__ +keras.ops.hstack(xs) +__doc__ +Stack tensors in sequence horizontally (column wise). + +This is equivalent to concatenation along the first axis for 1-D tensors, +and along the second axis for all other tensors. + +Args: + xs: Sequence of tensors. + +Returns: + The tensor formed by stacking the given tensors. diff --git a/.tether/man/op_identity.txt b/.tether/man/op_identity.txt new file mode 100644 index 0000000000..8a5b5b4016 --- /dev/null +++ b/.tether/man/op_identity.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.identity(n, dtype=None) +__doc__ +Return the identity tensor. + +The identity tensor is a square tensor with ones on the main diagonal and +zeros elsewhere. + +Args: + n: Number of rows (and columns) in the `n x n` output tensor. + dtype: Data type of the output tensor. + +Returns: + The identity tensor. diff --git a/.tether/man/op_imag.txt b/.tether/man/op_imag.txt new file mode 100644 index 0000000000..c8a889c42a --- /dev/null +++ b/.tether/man/op_imag.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.imag(x) +__doc__ +Return the imaginary part of the complex argument. + +Args: + x: Input tensor. + +Returns: + The imaginary component of the complex argument. diff --git a/.tether/man/op_image_affine_transform.txt b/.tether/man/op_image_affine_transform.txt new file mode 100644 index 0000000000..036b72b4d1 --- /dev/null +++ b/.tether/man/op_image_affine_transform.txt @@ -0,0 +1,84 @@ +__signature__ +keras.ops.image.affine_transform( + image, + transform, + interpolation='bilinear', + fill_mode='constant', + fill_value=0, + data_format='channels_last' +) +__doc__ +Applies the given transform(s) to the image(s). + +Args: + image: Input image or batch of images. Must be 3D or 4D. + transform: Projective transform matrix/matrices. A vector of length 8 or + tensor of size N x 8. If one row of transform is + `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps the output point + `(x, y)` to a transformed input point + `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, + where `k = c0 x + c1 y + 1`. The transform is inverted compared to + the transform mapping input points to output points. Note that + gradients are not backpropagated into transformation parameters. + Note that `c0` and `c1` are only effective when using TensorFlow + backend and will be considered as `0` when using other backends. + interpolation: Interpolation method. Available methods are `"nearest"`, + and `"bilinear"`. Defaults to `"bilinear"`. + fill_mode: Points outside the boundaries of the input are filled + according to the given mode. Available methods are `"constant"`, + `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. + - `"reflect"`: `(d c b a | a b c d | d c b a)` + The input is extended by reflecting about the edge of the last + pixel. + - `"constant"`: `(k k k k | a b c d | k k k k)` + The input is extended by filling all values beyond + the edge with the same constant value k specified by + `fill_value`. + - `"wrap"`: `(a b c d | a b c d | a b c d)` + The input is extended by wrapping around to the opposite edge. + - `"nearest"`: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. + fill_value: Value used for points outside the boundaries of the input if + `fill_mode="constant"`. Defaults to `0`. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, height, width, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch, channels, height, weight)`. It defaults to the + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json`. If you never set it, then it will be + `"channels_last"`. + +Returns: + Applied affine transform image or batch of images. + +Examples: + +>>> x = np.random.random((2, 64, 80, 3)) # batch of 2 RGB images +>>> transform = np.array( +... [ +... [1.5, 0, -20, 0, 1.5, -16, 0, 0], # zoom +... [1, 0, -20, 0, 1, -16, 0, 0], # translation +... ] +... ) +>>> y = keras.ops.image.affine_transform(x, transform) +>>> y.shape +(2, 64, 80, 3) + +>>> x = np.random.random((64, 80, 3)) # single RGB image +>>> transform = np.array([1.0, 0.5, -20, 0.5, 1.0, -16, 0, 0]) # shear +>>> y = keras.ops.image.affine_transform(x, transform) +>>> y.shape +(64, 80, 3) + +>>> x = np.random.random((2, 3, 64, 80)) # batch of 2 RGB images +>>> transform = np.array( +... [ +... [1.5, 0, -20, 0, 1.5, -16, 0, 0], # zoom +... [1, 0, -20, 0, 1, -16, 0, 0], # translation +... ] +... ) +>>> y = keras.ops.image.affine_transform(x, transform, +... data_format="channels_first") +>>> y.shape +(2, 3, 64, 80) diff --git a/.tether/man/op_image_crop.txt b/.tether/man/op_image_crop.txt new file mode 100644 index 0000000000..69d085ebc5 --- /dev/null +++ b/.tether/man/op_image_crop.txt @@ -0,0 +1,41 @@ +__signature__ +keras.ops.image.crop_images( + images, + top_cropping=None, + left_cropping=None, + target_height=None, + target_width=None, + bottom_cropping=None, + right_cropping=None +) +__doc__ +Crop `images` to a specified `height` and `width`. + +Args: + images: 4-D batch of images of shape `(batch, height, width, channels)` + or 3-D single image of shape `(height, width, channels)`. + top_cropping: Number of columns to crop from the top. + bottom_cropping: Number of columns to crop from the bottom. + left_cropping: Number of columns to crop from the left. + right_cropping: Number of columns to crop from the right. + target_height: Height of the output images. + target_width: Width of the output images. + +Returns: + If `images` were 4D, a 4D float Tensor of shape + `(batch, target_height, target_width, channels)` + If `images` were 3D, a 3D float Tensor of shape + `(target_height, target_width, channels)` + +Example: + +>>> images = np.reshape(np.arange(1, 28, dtype="float32"), [3, 3, 3]) +>>> images[:,:,0] # print the first channel of the images +array([[ 1., 4., 7.], + [10., 13., 16.], + [19., 22., 25.]], dtype=float32) +>>> cropped_images = keras.image.crop_images(images, 0, 0, 2, 2) +>>> cropped_images[:,:,0] # print the first channel of the cropped images +array([[ 1., 4.], + [10., 13.]], dtype=float32) + diff --git a/.tether/man/op_image_extract_patches.txt b/.tether/man/op_image_extract_patches.txt new file mode 100644 index 0000000000..f3c5788ea5 --- /dev/null +++ b/.tether/man/op_image_extract_patches.txt @@ -0,0 +1,46 @@ +__signature__ +keras.ops.image.extract_patches( + image, + size, + strides=None, + dilation_rate=1, + padding='valid', + data_format='channels_last' +) +__doc__ +Extracts patches from the image(s). + +Args: + image: Input image or batch of images. Must be 3D or 4D. + size: Patch size int or tuple (patch_height, patch_widht) + strides: strides along height and width. If not specified, or + if `None`, it defaults to the same value as `size`. + dilation_rate: This is the input stride, specifying how far two + consecutive patch samples are in the input. For value other than 1, + strides must be 1. NOTE: `strides > 1` is not supported in + conjunction with `dilation_rate > 1` + padding: The type of padding algorithm to use: `"same"` or `"valid"`. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, height, width, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch, channels, height, weight)`. It defaults to the + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json`. If you never set it, then it will be + `"channels_last"`. + +Returns: + Extracted patches 3D (if not batched) or 4D (if batched) + +Examples: + +>>> image = np.random.random( +... (2, 20, 20, 3) +... ).astype("float32") # batch of 2 RGB images +>>> patches = keras.ops.image.extract_patches(image, (5, 5)) +>>> patches.shape +(2, 4, 4, 75) +>>> image = np.random.random((20, 20, 3)).astype("float32") # 1 RGB image +>>> patches = keras.ops.image.extract_patches(image, (3, 3), (1, 1)) +>>> patches.shape +(18, 18, 27) diff --git a/.tether/man/op_image_map_coordinates.txt b/.tether/man/op_image_map_coordinates.txt new file mode 100644 index 0000000000..53f4f43916 --- /dev/null +++ b/.tether/man/op_image_map_coordinates.txt @@ -0,0 +1,43 @@ +__signature__ +keras.ops.image.map_coordinates( + input, + coordinates, + order, + fill_mode='constant', + fill_value=0 +) +__doc__ +Map the input array to new coordinates by interpolation.. + +Note that interpolation near boundaries differs from the scipy function, +because we fixed an outstanding bug +[scipy/issues/2640](https://github.com/scipy/scipy/issues/2640). + +Args: + input: The input array. + coordinates: The coordinates at which input is evaluated. + order: The order of the spline interpolation. The order must be `0` or + `1`. `0` indicates the nearest neighbor and `1` indicates the linear + interpolation. + fill_mode: Points outside the boundaries of the input are filled + according to the given mode. Available methods are `"constant"`, + `"nearest"`, `"wrap"` and `"mirror"` and `"reflect"`. Defaults to + `"constant"`. + - `"constant"`: `(k k k k | a b c d | k k k k)` + The input is extended by filling all values beyond + the edge with the same constant value k specified by + `fill_value`. + - `"nearest"`: `(a a a a | a b c d | d d d d)` + The input is extended by the nearest pixel. + - `"wrap"`: `(a b c d | a b c d | a b c d)` + The input is extended by wrapping around to the opposite edge. + - `"mirror"`: `(c d c b | a b c d | c b a b)` + The input is extended by mirroring about the edge. + - `"reflect"`: `(d c b a | a b c d | d c b a)` + The input is extended by reflecting about the edge of the last + pixel. + fill_value: Value used for points outside the boundaries of the input if + `fill_mode="constant"`. Defaults to `0`. + +Returns: + Output image or batch of images. diff --git a/.tether/man/op_image_pad.txt b/.tether/man/op_image_pad.txt new file mode 100644 index 0000000000..02b0f49ec6 --- /dev/null +++ b/.tether/man/op_image_pad.txt @@ -0,0 +1,44 @@ +__signature__ +keras.ops.image.pad_images( + images, + top_padding=None, + left_padding=None, + target_height=None, + target_width=None, + bottom_padding=None, + right_padding=None +) +__doc__ +Pad `images` with zeros to the specified `height` and `width`. + +Args: + images: 4D Tensor of shape `(batch, height, width, channels)` or 3D + Tensor of shape `(height, width, channels)`. + top_padding: Number of rows of zeros to add on top. + bottom_padding: Number of rows of zeros to add at the bottom. + left_padding: Number of columns of zeros to add on the left. + right_padding: Number of columns of zeros to add on the right. + target_height: Height of output images. + target_width: Width of output images. + +Returns: + If `images` were 4D, a 4D float Tensor of shape + `(batch, target_height, target_width, channels)` + If `images` were 3D, a 3D float Tensor of shape + `(target_height, target_width, channels)` + +Example: + +>>> images = np.random.random((15, 25, 3)) +>>> padded_images = keras.ops.image.pad_images( +... images, 2, 3, target_height=20, target_width=30 +... ) +>>> padded_images.shape +(20, 30, 3) + +>>> batch_images = np.random.random((2, 15, 25, 3)) +>>> padded_batch = keras.ops.image.pad_images( +... batch_images, 2, 3, target_height=20, target_width=30 +... ) +>>> padded_batch.shape +(2, 20, 30, 3) diff --git a/.tether/man/op_image_resize.txt b/.tether/man/op_image_resize.txt new file mode 100644 index 0000000000..a57a6acb0b --- /dev/null +++ b/.tether/man/op_image_resize.txt @@ -0,0 +1,68 @@ +__signature__ +keras.ops.image.resize( + image, + size, + interpolation='bilinear', + antialias=False, + crop_to_aspect_ratio=False, + pad_to_aspect_ratio=False, + fill_mode='constant', + fill_value=0.0, + data_format='channels_last' +) +__doc__ +Resize images to size using the specified interpolation method. + +Args: + image: Input image or batch of images. Must be 3D or 4D. + size: Size of output image in `(height, width)` format. + interpolation: Interpolation method. Available methods are `"nearest"`, + `"bilinear"`, and `"bicubic"`. Defaults to `"bilinear"`. + antialias: Whether to use an antialiasing filter when downsampling an + image. Defaults to `False`. + crop_to_aspect_ratio: If `True`, resize the images without aspect + ratio distortion. When the original aspect ratio differs + from the target aspect ratio, the output image will be + cropped so as to return the + largest possible window in the image (of size `(height, width)`) + that matches the target aspect ratio. By default + (`crop_to_aspect_ratio=False`), aspect ratio may not be preserved. + pad_to_aspect_ratio: If `True`, pad the images without aspect + ratio distortion. When the original aspect ratio differs + from the target aspect ratio, the output image will be + evenly padded on the short side. + fill_mode: When using `pad_to_aspect_ratio=True`, padded areas + are filled according to the given mode. Only `"constant"` is + supported at this time + (fill with constant value, equal to `fill_value`). + fill_value: Float. Padding value to use when `pad_to_aspect_ratio=True`. + data_format: string, either `"channels_last"` or `"channels_first"`. + The ordering of the dimensions in the inputs. `"channels_last"` + corresponds to inputs with shape `(batch, height, width, channels)` + while `"channels_first"` corresponds to inputs with shape + `(batch, channels, height, weight)`. It defaults to the + `image_data_format` value found in your Keras config file at + `~/.keras/keras.json`. If you never set it, then it will be + `"channels_last"`. + +Returns: + Resized image or batch of images. + +Examples: + +>>> x = np.random.random((2, 4, 4, 3)) # batch of 2 RGB images +>>> y = keras.ops.image.resize(x, (2, 2)) +>>> y.shape +(2, 2, 2, 3) + +>>> x = np.random.random((4, 4, 3)) # single RGB image +>>> y = keras.ops.image.resize(x, (2, 2)) +>>> y.shape +(2, 2, 3) + +>>> x = np.random.random((2, 3, 4, 4)) # batch of 2 RGB images +>>> y = keras.ops.image.resize(x, (2, 2), +... data_format="channels_first") +>>> y.shape +(2, 3, 2, 2) + diff --git a/.tether/man/op_image_rgb_to_grayscale.txt b/.tether/man/op_image_rgb_to_grayscale.txt new file mode 100644 index 0000000000..8432f5e257 --- /dev/null +++ b/.tether/man/op_image_rgb_to_grayscale.txt @@ -0,0 +1,41 @@ +__signature__ +keras.ops.image.rgb_to_grayscale(image, data_format='channels_last') +__doc__ +Convert RGB images to grayscale. + +This function converts RGB images to grayscale images. It supports both +3D and 4D tensors, where the last dimension represents channels. + +Args: + image: Input RGB image or batch of RGB images. Must be a 3D tensor + with shape `(height, width, channels)` or a 4D tensor with shape + `(batch, height, width, channels)`. + data_format: A string specifying the data format of the input tensor. + It can be either `"channels_last"` or `"channels_first"`. + `"channels_last"` corresponds to inputs with shape + `(batch, height, width, channels)`, while `"channels_first"` + corresponds to inputs with shape `(batch, channels, height, width)`. + Defaults to `"channels_last"`. + +Returns: + Grayscale image or batch of grayscale images. + +Examples: + +>>> import numpy as np +>>> from keras.src import ops +>>> x = np.random.random((2, 4, 4, 3)) +>>> y = ops.image.rgb_to_grayscale(x) +>>> y.shape +(2, 4, 4, 1) + +>>> x = np.random.random((4, 4, 3)) # Single RGB image +>>> y = ops.image.rgb_to_grayscale(x) +>>> y.shape +(4, 4, 1) + +>>> x = np.random.random((2, 3, 4, 4)) +>>> y = ops.image.rgb_to_grayscale(x, data_format="channels_first") +>>> y.shape +(2, 1, 4, 4) + diff --git a/.tether/man/op_in_top_k.txt b/.tether/man/op_in_top_k.txt new file mode 100644 index 0000000000..1d7843644c --- /dev/null +++ b/.tether/man/op_in_top_k.txt @@ -0,0 +1,27 @@ +__signature__ +keras.ops.in_top_k( + targets, + predictions, + k +) +__doc__ +Checks if the targets are in the top-k predictions. + +Args: + targets: A tensor of true labels. + predictions: A tensor of predicted labels. + k: An integer representing the number of predictions to consider. + +Returns: + A boolean tensor of the same shape as `targets`, where each element + indicates whether the corresponding target is in the top-k predictions. + +Example: + +>>> targets = keras.ops.convert_to_tensor([2, 5, 3]) +>>> predictions = keras.ops.convert_to_tensor( +... [[0.1, 0.4, 0.6, 0.9, 0.5], +... [0.1, 0.7, 0.9, 0.8, 0.3], +... [0.1, 0.6, 0.9, 0.9, 0.5]]) +>>> in_top_k(targets, predictions, k=3) +array([ True False True], shape=(3,), dtype=bool) diff --git a/.tether/man/op_inv.txt b/.tether/man/op_inv.txt new file mode 100644 index 0000000000..fb0afb23b3 --- /dev/null +++ b/.tether/man/op_inv.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.inv(x) +__doc__ +Computes the inverse of a square tensor. + +Args: + x: Input tensor of shape `(..., M, M)`. + +Returns: + A tensor of shape `(..., M, M)` representing the inverse of `x`. + diff --git a/.tether/man/op_irfft.txt b/.tether/man/op_irfft.txt new file mode 100644 index 0000000000..2f3ccb43bf --- /dev/null +++ b/.tether/man/op_irfft.txt @@ -0,0 +1,39 @@ +__signature__ +keras.ops.irfft(x, fft_length=None) +__doc__ +Inverse real-valued Fast Fourier transform along the last axis. + +Computes the inverse 1D Discrete Fourier Transform of a real-valued signal +over the inner-most dimension of input. + +The inner-most dimension of the input is assumed to be the result of RFFT: +the `fft_length / 2 + 1` unique components of the DFT of a real-valued +signal. If `fft_length` is not provided, it is computed from the size of the +inner-most dimension of the input `(fft_length = 2 * (inner - 1))`. If the +FFT length used to compute is odd, it should be provided since it cannot +be inferred properly. + +Along the axis IRFFT is computed on, if `fft_length / 2 + 1` is smaller than +the corresponding dimension of the input, the dimension is cropped. If it is +larger, the dimension is padded with zeros. + +Args: + x: Tuple of the real and imaginary parts of the input tensor. Both + tensors in the tuple should be of floating type. + fft_length: An integer representing the number of the fft length. If not + specified, it is inferred from the length of the last axis of `x`. + Defaults to `None`. + +Returns: + A tensor containing the inverse real-valued Fast Fourier Transform + along the last axis of `x`. + +Examples: + +>>> real = keras.ops.convert_to_tensor([0.0, 1.0, 2.0, 3.0, 4.0]) +>>> imag = keras.ops.convert_to_tensor([0.0, 1.0, 2.0, 3.0, 4.0]) +>>> irfft((real, imag)) +array([0.66666667, -0.9106836, 0.24401694]) + +>>> irfft(rfft(real, 5), 5) +array([0.0, 1.0, 2.0, 3.0, 4.0]) diff --git a/.tether/man/op_is_tensor.txt b/.tether/man/op_is_tensor.txt new file mode 100644 index 0000000000..07db2c79f4 --- /dev/null +++ b/.tether/man/op_is_tensor.txt @@ -0,0 +1,13 @@ +__signature__ +keras.ops.is_tensor(x) +__doc__ +Check whether the given object is a tensor. + +Note: This checks for backend specific tensors so passing a TensorFlow +tensor would return `False` if your backend is PyTorch or JAX. + +Args: + x: A variable. + +Returns: + `True` if `x` is a tensor, otherwise `False`. diff --git a/.tether/man/op_isclose.txt b/.tether/man/op_isclose.txt new file mode 100644 index 0000000000..067d5b76cd --- /dev/null +++ b/.tether/man/op_isclose.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.isclose(x1, x2) +__doc__ +Return whether two tensors are element-wise almost equal. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output boolean tensor. diff --git a/.tether/man/op_isfinite.txt b/.tether/man/op_isfinite.txt new file mode 100644 index 0000000000..5eddbd04a6 --- /dev/null +++ b/.tether/man/op_isfinite.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.isfinite(x) +__doc__ +Return whether a tensor is finite, element-wise. + +Real values are finite when they are not NaN, not positive infinity, and +not negative infinity. Complex values are finite when both their real +and imaginary parts are finite. + +Args: + x: Input tensor. + +Returns: + Output boolean tensor. diff --git a/.tether/man/op_isinf.txt b/.tether/man/op_isinf.txt new file mode 100644 index 0000000000..0662f49888 --- /dev/null +++ b/.tether/man/op_isinf.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.isinf(x) +__doc__ +Test element-wise for positive or negative infinity. + +Args: + x: Input tensor. + +Returns: + Output boolean tensor. diff --git a/.tether/man/op_isnan.txt b/.tether/man/op_isnan.txt new file mode 100644 index 0000000000..cb41bf6371 --- /dev/null +++ b/.tether/man/op_isnan.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.isnan(x) +__doc__ +Test element-wise for NaN and return result as a boolean tensor. + +Args: + x: Input tensor. + +Returns: + Output boolean tensor. diff --git a/.tether/man/op_istft.txt b/.tether/man/op_istft.txt new file mode 100644 index 0000000000..2ca09091a3 --- /dev/null +++ b/.tether/man/op_istft.txt @@ -0,0 +1,43 @@ +__signature__ +keras.ops.istft( + x, + sequence_length, + sequence_stride, + fft_length, + length=None, + window='hann', + center=True +) +__doc__ +Inverse Short-Time Fourier Transform along the last axis of the input. + +To reconstruct an original waveform, the parameters should be the same in +`stft`. + +Args: + x: Tuple of the real and imaginary parts of the input tensor. Both + tensors in the tuple should be of floating type. + sequence_length: An integer representing the sequence length. + sequence_stride: An integer representing the sequence hop size. + fft_length: An integer representing the size of the FFT that produced + `stft`. + length: An integer representing the output is clipped to exactly length. + If not specified, no padding or clipping take place. Defaults to + `None`. + window: A string, a tensor of the window or `None`. If `window` is a + string, available values are `"hann"` and `"hamming"`. If `window` + is a tensor, it will be used directly as the window and its length + must be `sequence_length`. If `window` is `None`, no windowing is + used. Defaults to `"hann"`. + center: Whether `x` was padded on both sides so that the t-th sequence + is centered at time `t * sequence_stride`. Defaults to `True`. + +Returns: + A tensor containing the inverse Short-Time Fourier Transform along the + last axis of `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([0.0, 1.0, 2.0, 3.0, 4.0]) +>>> istft(stft(x, 1, 1, 1), 1, 1, 1) +array([0.0, 1.0, 2.0, 3.0, 4.0]) diff --git a/.tether/man/op_leaky_relu.txt b/.tether/man/op_leaky_relu.txt new file mode 100644 index 0000000000..10c2ac65d4 --- /dev/null +++ b/.tether/man/op_leaky_relu.txt @@ -0,0 +1,23 @@ +__signature__ +keras.ops.leaky_relu(x, negative_slope=0.2) +__doc__ +Leaky version of a Rectified Linear Unit activation function. + +It allows a small gradient when the unit is not active, it is defined as: + +`f(x) = alpha * x for x < 0` or `f(x) = x for x >= 0`. + +Args: + x: Input tensor. + negative_slope: Slope of the activation function at x < 0. + Defaults to `0.2`. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = np.array([-1., 0., 1.]) +>>> x_leaky_relu = keras.ops.leaky_relu(x) +>>> print(x_leaky_relu) +array([-0.2, 0. , 1. ], shape=(3,), dtype=float64) diff --git a/.tether/man/op_less.txt b/.tether/man/op_less.txt new file mode 100644 index 0000000000..a6106a072d --- /dev/null +++ b/.tether/man/op_less.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.less(x1, x2) +__doc__ +Return the truth value of `x1 < x2` element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, element-wise comparison of `x1` and `x2`. diff --git a/.tether/man/op_less_equal.txt b/.tether/man/op_less_equal.txt new file mode 100644 index 0000000000..7ab9c640a2 --- /dev/null +++ b/.tether/man/op_less_equal.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.less_equal(x1, x2) +__doc__ +Return the truth value of `x1 <= x2` element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, element-wise comparison of `x1` and `x2`. diff --git a/.tether/man/op_linspace.txt b/.tether/man/op_linspace.txt new file mode 100644 index 0000000000..bf1443531f --- /dev/null +++ b/.tether/man/op_linspace.txt @@ -0,0 +1,41 @@ +__signature__ +keras.ops.linspace( + start, + stop, + num=50, + endpoint=True, + retstep=False, + dtype=None, + axis=0 +) +__doc__ +Return evenly spaced numbers over a specified interval. + +Returns `num` evenly spaced samples, calculated over the interval +`[start, stop]`. + +The endpoint of the interval can optionally be excluded. + +Args: + start: The starting value of the sequence. + stop: The end value of the sequence, unless `endpoint` is set to + `False`. In that case, the sequence consists of all but the last + of `num + 1` evenly spaced samples, so that `stop` is excluded. + Note that the step size changes when `endpoint` is `False`. + num: Number of samples to generate. Defaults to `50`. Must be + non-negative. + endpoint: If `True`, `stop` is the last sample. Otherwise, it is + not included. Defaults to `True`. + retstep: If `True`, return `(samples, step)`, where `step` is the + spacing between samples. + dtype: The type of the output tensor. + axis: The axis in the result to store the samples. Relevant only if + start or stop are array-like. Defaults to `0`. + +Note: + Torch backend does not support `axis` argument. + +Returns: + A tensor of evenly spaced numbers. + If `retstep` is `True`, returns `(samples, step)` + diff --git a/.tether/man/op_log.txt b/.tether/man/op_log.txt new file mode 100644 index 0000000000..00ad94eabc --- /dev/null +++ b/.tether/man/op_log.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.log(x) +__doc__ +Natural logarithm, element-wise. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise natural logarithm of `x`. diff --git a/.tether/man/op_log10.txt b/.tether/man/op_log10.txt new file mode 100644 index 0000000000..c72b6e2fd5 --- /dev/null +++ b/.tether/man/op_log10.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.log10(x) +__doc__ +Return the base 10 logarithm of the input tensor, element-wise. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise base 10 logarithm of `x`. diff --git a/.tether/man/op_log1p.txt b/.tether/man/op_log1p.txt new file mode 100644 index 0000000000..8e1d7a5720 --- /dev/null +++ b/.tether/man/op_log1p.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.log1p(x) +__doc__ +Returns the natural logarithm of one plus the `x`, element-wise. + +Calculates `log(1 + x)`. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise natural logarithm of `1 + x`. diff --git a/.tether/man/op_log2.txt b/.tether/man/op_log2.txt new file mode 100644 index 0000000000..70406d4dfe --- /dev/null +++ b/.tether/man/op_log2.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.log2(x) +__doc__ +Base-2 logarithm of `x`, element-wise. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise base-2 logarithm of `x`. diff --git a/.tether/man/op_log_sigmoid.txt b/.tether/man/op_log_sigmoid.txt new file mode 100644 index 0000000000..dcdda7e280 --- /dev/null +++ b/.tether/man/op_log_sigmoid.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.log_sigmoid(x) +__doc__ +Logarithm of the sigmoid activation function. + +It is defined as `f(x) = log(1 / (1 + exp(-x)))`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-0.541391, 0.0, 0.50, 5.0]) +>>> keras.ops.log_sigmoid(x) +array([-1.0000418, -0.6931472, -0.474077, -0.00671535], dtype=float32) diff --git a/.tether/man/op_log_softmax.txt b/.tether/man/op_log_softmax.txt new file mode 100644 index 0000000000..517fe74e18 --- /dev/null +++ b/.tether/man/op_log_softmax.txt @@ -0,0 +1,22 @@ +__signature__ +keras.ops.log_softmax(x, axis=-1) +__doc__ +Log-softmax activation function. + +It is defined as: +`f(x) = x - max(x) - log(sum(exp(x - max(x))))` + +Args: + x: Input tensor. + axis: Integer, axis along which the log-softmax is applied. + Defaults to `-1`. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = np.array([-1., 0., 1.]) +>>> x_log_softmax = keras.ops.log_softmax(x) +>>> print(x_log_softmax) +array([-2.40760596, -1.40760596, -0.40760596], shape=(3,), dtype=float64) diff --git a/.tether/man/op_logaddexp.txt b/.tether/man/op_logaddexp.txt new file mode 100644 index 0000000000..f300734d2f --- /dev/null +++ b/.tether/man/op_logaddexp.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.logaddexp(x1, x2) +__doc__ +Logarithm of the sum of exponentiations of the inputs. + +Calculates `log(exp(x1) + exp(x2))`. + +Args: + x1: Input tensor. + x2: Input tensor. + +Returns: + Output tensor, element-wise logarithm of the sum of exponentiations + of the inputs. diff --git a/.tether/man/op_logical_and.txt b/.tether/man/op_logical_and.txt new file mode 100644 index 0000000000..e5b517609f --- /dev/null +++ b/.tether/man/op_logical_and.txt @@ -0,0 +1,13 @@ +__signature__ +keras.ops.logical_and(x1, x2) +__doc__ +Computes the element-wise logical AND of the given input tensors. + +Zeros are treated as `False` and non-zeros are treated as `True`. + +Args: + x1: Input tensor. + x2: Input tensor. + +Returns: + Output tensor, element-wise logical AND of the inputs. diff --git a/.tether/man/op_logical_not.txt b/.tether/man/op_logical_not.txt new file mode 100644 index 0000000000..256d3db922 --- /dev/null +++ b/.tether/man/op_logical_not.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.logical_not(x) +__doc__ +Computes the element-wise NOT of the given input tensor. + +Zeros are treated as `False` and non-zeros are treated as `True`. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise logical NOT of the input. diff --git a/.tether/man/op_logical_or.txt b/.tether/man/op_logical_or.txt new file mode 100644 index 0000000000..c82a90d2ea --- /dev/null +++ b/.tether/man/op_logical_or.txt @@ -0,0 +1,13 @@ +__signature__ +keras.ops.logical_or(x1, x2) +__doc__ +Computes the element-wise logical OR of the given input tensors. + +Zeros are treated as `False` and non-zeros are treated as `True`. + +Args: + x1: Input tensor. + x2: Input tensor. + +Returns: + Output tensor, element-wise logical OR of the inputs. diff --git a/.tether/man/op_logical_xor.txt b/.tether/man/op_logical_xor.txt new file mode 100644 index 0000000000..ea0482771b --- /dev/null +++ b/.tether/man/op_logical_xor.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.logical_xor(x1, x2) +__doc__ +Compute the truth value of `x1 XOR x2`, element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output boolean tensor. diff --git a/.tether/man/op_logspace.txt b/.tether/man/op_logspace.txt new file mode 100644 index 0000000000..34d457b496 --- /dev/null +++ b/.tether/man/op_logspace.txt @@ -0,0 +1,36 @@ +__signature__ +keras.ops.logspace( + start, + stop, + num=50, + endpoint=True, + base=10, + dtype=None, + axis=0 +) +__doc__ +Returns numbers spaced evenly on a log scale. + +In linear space, the sequence starts at `base ** start` and ends with +`base ** stop` (see `endpoint` below). + +Args: + start: The starting value of the sequence. + stop: The final value of the sequence, unless `endpoint` is `False`. + In that case, `num + 1` values are spaced over the interval in + log-space, of which all but the last (a sequence of length `num`) + are returned. + num: Number of samples to generate. Defaults to `50`. + endpoint: If `True`, `stop` is the last sample. Otherwise, it is not + included. Defaults to `True`. + base: The base of the log space. Defaults to `10`. + dtype: The type of the output tensor. + axis: The axis in the result to store the samples. Relevant only + if start or stop are array-like. + +Note: + Torch backend does not support `axis` argument. + +Returns: + A tensor of evenly spaced samples on a log scale. + diff --git a/.tether/man/op_logsumexp.txt b/.tether/man/op_logsumexp.txt new file mode 100644 index 0000000000..a0175c0666 --- /dev/null +++ b/.tether/man/op_logsumexp.txt @@ -0,0 +1,26 @@ +__signature__ +keras.ops.logsumexp( + x, + axis=None, + keepdims=False +) +__doc__ +Computes the logarithm of sum of exponentials of elements in a tensor. + +Args: + x: Input tensor. + axis: An integer or a tuple of integers specifying the axis/axes + along which to compute the sum. If `None`, the sum is computed + over all elements. Defaults to`None`. + keepdims: A boolean indicating whether to keep the dimensions of + the input tensor when computing the sum. Defaults to`False`. + +Returns: + A tensor containing the logarithm of the sum of exponentials of + elements in `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([1., 2., 3.]) +>>> logsumexp(x) +3.407606 diff --git a/.tether/man/op_lu_factor.txt b/.tether/man/op_lu_factor.txt new file mode 100644 index 0000000000..3a84d22662 --- /dev/null +++ b/.tether/man/op_lu_factor.txt @@ -0,0 +1,13 @@ +__signature__ +keras.ops.lu_factor(x) +__doc__ +Computes the lower-upper decomposition of a square matrix. + +Args: + x: A tensor of shape `(..., M, M)`. + +Returns: + A tuple of two tensors: a tensor of shape `(..., M, M)` containing the + lower and upper triangular matrices and a tensor of shape `(..., M)` + containing the pivots. + diff --git a/.tether/man/op_matmul.txt b/.tether/man/op_matmul.txt new file mode 100644 index 0000000000..5e13580b79 --- /dev/null +++ b/.tether/man/op_matmul.txt @@ -0,0 +1,20 @@ +__signature__ +keras.ops.matmul(x1, x2) +__doc__ +Matrix product of two tensors. + +- If both tensors are 1-dimensional, the dot product (scalar) is returned. +- If either tensor is N-D, N > 2, it is treated as a stack of matrices + residing in the last two indexes and broadcast accordingly. +- If the first tensor is 1-D, it is promoted to a matrix by prepending + a 1 to its dimensions. After matrix multiplication the prepended + 1 is removed. +- If the second tensor is 1-D, it is promoted to a matrix by appending a 1 + to its dimensions. After matrix multiplication the appended 1 is removed. + +Args: + x1: First tensor. + x2: Second tensor. + +Returns: + Output tensor, matrix product of the inputs. diff --git a/.tether/man/op_max.txt b/.tether/man/op_max.txt new file mode 100644 index 0000000000..5fde2f88ed --- /dev/null +++ b/.tether/man/op_max.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.max( + x, + axis=None, + keepdims=False, + initial=None +) +__doc__ +Return the maximum of a tensor or maximum along an axis. + +Args: + x: Input tensor. + axis: Axis or axes along which to operate. By default, flattened input + is used. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. Defaults to `False`. + initial: The minimum value of an output element. Defaults to `None`. + +Returns: + Maximum of `x`. + diff --git a/.tether/man/op_max_pool.txt b/.tether/man/op_max_pool.txt new file mode 100644 index 0000000000..e27eaaf7e9 --- /dev/null +++ b/.tether/man/op_max_pool.txt @@ -0,0 +1,40 @@ +__signature__ +keras.ops.max_pool( + inputs, + pool_size, + strides=None, + padding='valid', + data_format=None +) +__doc__ +Max pooling operation. + +Args: + inputs: Tensor of rank N+2. `inputs` has shape + `(batch_size,) + inputs_spatial_shape + (num_channels,)` if + `data_format="channels_last"`, or + `(batch_size, num_channels) + inputs_spatial_shape` if + `data_format="channels_first"`. Pooling happens over the spatial + dimensions only. + pool_size: int or tuple/list of integers of size + `len(inputs_spatial_shape)`, specifying the size of the pooling + window for each spatial dimension of the input tensor. If + `pool_size` is int, then every spatial dimension shares the same + `pool_size`. + strides: int or tuple/list of integers of size + `len(inputs_spatial_shape)`. The stride of the sliding window for + each spatial dimension of the input tensor. If `strides` is int, + then every spatial dimension shares the same `strides`. + padding: string, either `"valid"` or `"same"`. `"valid"` means no + padding is applied, and `"same"` results in padding evenly to the + left/right or up/down of the input such that output has the + same height/width dimension as the input when `strides=1`. + data_format: A string, either `"channels_last"` or `"channels_first"`. + `data_format` determines the ordering of the dimensions in the + inputs. If `data_format="channels_last"`, `inputs` is of shape + `(batch_size, ..., channels)` while if + `data_format="channels_first"`, `inputs` is of shape + `(batch_size, channels, ...)`. + +Returns: + A tensor of rank N+2, the result of the max pooling operation. diff --git a/.tether/man/op_maximum.txt b/.tether/man/op_maximum.txt new file mode 100644 index 0000000000..1a1bbe908c --- /dev/null +++ b/.tether/man/op_maximum.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.maximum(x1, x2) +__doc__ +Element-wise maximum of `x1` and `x2`. + +Args: + x1: First tensor. + x2: Second tensor. + +Returns: + Output tensor, element-wise maximum of `x1` and `x2`. diff --git a/.tether/man/op_mean.txt b/.tether/man/op_mean.txt new file mode 100644 index 0000000000..3524ae17b2 --- /dev/null +++ b/.tether/man/op_mean.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.mean( + x, + axis=None, + keepdims=False +) +__doc__ +Compute the arithmetic mean along the specified axes. + +Args: + x: Input tensor. + axis: Axis or axes along which the means are computed. The default + is to compute the mean of the flattened tensor. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. + +Returns: + Output tensor containing the mean values. diff --git a/.tether/man/op_median.txt b/.tether/man/op_median.txt new file mode 100644 index 0000000000..1052a6db62 --- /dev/null +++ b/.tether/man/op_median.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.median( + x, + axis=None, + keepdims=False +) +__doc__ +Compute the median along the specified axis. + +Args: + x: Input tensor. + axis: Axis or axes along which the medians are computed. Defaults to + `axis=None` which is to compute the median(s) along a flattened + version of the array. + keepdims: If this is set to `True`, the axes which are reduce + are left in the result as dimensions with size one. + +Returns: + The output tensor. diff --git a/.tether/man/op_meshgrid.txt b/.tether/man/op_meshgrid.txt new file mode 100644 index 0000000000..eb6a03c731 --- /dev/null +++ b/.tether/man/op_meshgrid.txt @@ -0,0 +1,33 @@ +__signature__ +keras.ops.meshgrid(*x, indexing='xy') +__doc__ +Creates grids of coordinates from coordinate vectors. + +Given `N` 1-D tensors `T0, T1, ..., TN-1` as inputs with corresponding +lengths `S0, S1, ..., SN-1`, this creates an `N` N-dimensional tensors +`G0, G1, ..., GN-1` each with shape `(S0, ..., SN-1)` where the output +`Gi` is constructed by expanding `Ti` to the result shape. + +Args: + x: 1-D tensors representing the coordinates of a grid. + indexing: `"xy"` or `"ij"`. "xy" is cartesian; `"ij"` is matrix + indexing of output. Defaults to `"xy"`. + +Returns: + Sequence of N tensors. + +Example: +>>> from keras.src import ops +>>> x = ops.array([1, 2, 3]) +>>> y = ops.array([4, 5, 6]) + +>>> grid_x, grid_y = ops.meshgrid(x, y, indexing="ij") +>>> grid_x +array([[1, 1, 1], + [2, 2, 2], + [3, 3, 3]]) +>>> grid_y +array([[4, 5, 6], + [4, 5, 6], + [4, 5, 6]]) + diff --git a/.tether/man/op_min.txt b/.tether/man/op_min.txt new file mode 100644 index 0000000000..7f019437c8 --- /dev/null +++ b/.tether/man/op_min.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.min( + x, + axis=None, + keepdims=False, + initial=None +) +__doc__ +Return the minimum of a tensor or minimum along an axis. + +Args: + x: Input tensor. + axis: Axis or axes along which to operate. By default, flattened input + is used. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. Defaults to `False`. + initial: The maximum value of an output element. Defaults to `None`. + +Returns: + Minimum of `x`. + diff --git a/.tether/man/op_minimum.txt b/.tether/man/op_minimum.txt new file mode 100644 index 0000000000..1bf234cdbe --- /dev/null +++ b/.tether/man/op_minimum.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.minimum(x1, x2) +__doc__ +Element-wise minimum of `x1` and `x2`. + +Args: + x1: First tensor. + x2: Second tensor. + +Returns: + Output tensor, element-wise minimum of `x1` and `x2`. diff --git a/.tether/man/op_mod.txt b/.tether/man/op_mod.txt new file mode 100644 index 0000000000..61df0491a7 --- /dev/null +++ b/.tether/man/op_mod.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.mod(x1, x2) +__doc__ +Returns the element-wise remainder of division. + +Args: + x1: First tensor. + x2: Second tensor. + +Returns: + Output tensor, element-wise remainder of division. diff --git a/.tether/man/op_moments.txt b/.tether/man/op_moments.txt new file mode 100644 index 0000000000..2ec11959a7 --- /dev/null +++ b/.tether/man/op_moments.txt @@ -0,0 +1,33 @@ +__signature__ +keras.ops.moments( + x, + axes, + keepdims=False, + synchronized=False +) +__doc__ +Calculates the mean and variance of `x`. + +The mean and variance are calculated by aggregating the contents of `x` +across `axes`. If `x` is 1-D and `axes = [0]` this is just the mean and +variance of a vector. + +Args: + x: Input tensor. + axes: A list of axes which to compute mean and variance. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. + synchronized: Only applicable with the TensorFlow backend. + If `True`, synchronizes the global batch statistics (mean and + variance) across all devices at each training step in a + distributed training strategy. If `False`, each replica uses its own + local batch statistics. + +Returns: + A tuple containing two tensors - mean and variance. + +Example: + +>>> x = keras.ops.convert_to_tensor([0, 1, 2, 3, 100], dtype="float32") +>>> keras.ops.moments(x, axes=[0]) +(array(21.2, dtype=float32), array(1553.3601, dtype=float32)) diff --git a/.tether/man/op_moveaxis.txt b/.tether/man/op_moveaxis.txt new file mode 100644 index 0000000000..43fd9dd40b --- /dev/null +++ b/.tether/man/op_moveaxis.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.moveaxis( + x, + source, + destination +) +__doc__ +Move axes of a tensor to new positions. + +Other axes remain in their original order. + +Args: + x: Tensor whose axes should be reordered. + source: Original positions of the axes to move. These must be unique. + destination: Destinations positions for each of the original axes. + These must also be unique. + +Returns: + Tensor with moved axes. diff --git a/.tether/man/op_multi_hot.txt b/.tether/man/op_multi_hot.txt new file mode 100644 index 0000000000..69b2d4e55e --- /dev/null +++ b/.tether/man/op_multi_hot.txt @@ -0,0 +1,34 @@ +__signature__ +keras.ops.multi_hot( + inputs, + num_classes=None, + axis=-1, + dtype=None, + sparse=False, + **kwargs +) +__doc__ +Encodes integer labels as multi-hot vectors. + +This function encodes integer labels as multi-hot vectors, where each label +is mapped to a binary value in the resulting vector. + +Args: + inputs: Tensor of integer labels to be converted to multi-hot vectors. + num_classes: Integer, the total number of unique classes. + axis: (optional) Axis along which the multi-hot encoding should be + added. Defaults to `-1`, which corresponds to the last dimension. + dtype: (optional) The data type of the resulting tensor. Default + is backend's float type. + sparse: Whether to return a sparse tensor; for backends that support + sparse tensors. + +Returns: + Tensor: The multi-hot encoded tensor. + +Example: + +>>> data = keras.ops.convert_to_tensor([0, 4]) +>>> keras.ops.multi_hot(data, num_classes=5) +array([1.0, 0.0, 0.0, 0.0, 1.0], dtype=float32) + diff --git a/.tether/man/op_multiply.txt b/.tether/man/op_multiply.txt new file mode 100644 index 0000000000..2b4509ff96 --- /dev/null +++ b/.tether/man/op_multiply.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.multiply(x1, x2) +__doc__ +Multiply arguments element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, element-wise product of `x1` and `x2`. diff --git a/.tether/man/op_nan_to_num.txt b/.tether/man/op_nan_to_num.txt new file mode 100644 index 0000000000..703a818603 --- /dev/null +++ b/.tether/man/op_nan_to_num.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.nan_to_num( + x, + nan=0.0, + posinf=None, + neginf=None +) +__doc__ +Replace NaN with zero and infinity with large finite numbers. + +Args: + x: Input data. + nan: Optional float or int. Value to replace `NaN` entries with. + posinf: Optional float or int. + Value to replace positive infinity with. + neginf: Optional float or int. + Value to replace negative infinity with. + +Returns: + `x`, with non-finite values replaced. + diff --git a/.tether/man/op_ndim.txt b/.tether/man/op_ndim.txt new file mode 100644 index 0000000000..682f2b0038 --- /dev/null +++ b/.tether/man/op_ndim.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.ndim(x) +__doc__ +Return the number of dimensions of a tensor. + +Args: + x: Input tensor. + +Returns: + The number of dimensions in `x`. diff --git a/.tether/man/op_negative.txt b/.tether/man/op_negative.txt new file mode 100644 index 0000000000..53d72a6e95 --- /dev/null +++ b/.tether/man/op_negative.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.negative(x) +__doc__ +Numerical negative, element-wise. + +Args: + x: Input tensor. + +Returns: + Output tensor, `y = -x`. diff --git a/.tether/man/op_nonzero.txt b/.tether/man/op_nonzero.txt new file mode 100644 index 0000000000..ed0b353faf --- /dev/null +++ b/.tether/man/op_nonzero.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.nonzero(x) +__doc__ +Return the indices of the elements that are non-zero. + +Args: + x: Input tensor. + +Returns: + Indices of elements that are non-zero. diff --git a/.tether/man/op_norm.txt b/.tether/man/op_norm.txt new file mode 100644 index 0000000000..1b58f86271 --- /dev/null +++ b/.tether/man/op_norm.txt @@ -0,0 +1,62 @@ +__signature__ +keras.ops.norm( + x, + ord=None, + axis=None, + keepdims=False +) +__doc__ +Matrix or vector norm. + +This function is able to return one of eight different matrix norms, or one +of an infinite number of vector norms (described below), depending on the +value of the `ord` parameter. + +Args: + x: Input tensor. + ord: Order of the norm (see table under Notes). The default is `None`. + axis: If `axis` is an integer, it specifies the axis of `x` along which + to compute the vector norms. If `axis` is a 2-tuple, it specifies + the axes that hold 2-D matrices, and the matrix norms of these + matrices are computed. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. + +Note: + For values of `ord < 1`, the result is, strictly speaking, not a + mathematical 'norm', but it may still be useful for various numerical + purposes. The following norms can be calculated: + - For matrices: + - `ord=None`: Frobenius norm + - `ord="fro"`: Frobenius norm + - `ord="nuc"`: nuclear norm + - `ord=np.inf`: `max(sum(abs(x), axis=1))` + - `ord=-np.inf`: `min(sum(abs(x), axis=1))` + - `ord=0`: not supported + - `ord=1`: `max(sum(abs(x), axis=0))` + - `ord=-1`: `min(sum(abs(x), axis=0))` + - `ord=2`: 2-norm (largest sing. value) + - `ord=-2`: smallest singular value + - other: not supported + - For vectors: + - `ord=None`: 2-norm + - `ord="fro"`: not supported + - `ord="nuc"`: not supported + - `ord=np.inf`: `max(abs(x))` + - `ord=-np.inf`: `min(abs(x))` + - `ord=0`: `sum(x != 0)` + - `ord=1`: as below + - `ord=-1`: as below + - `ord=2`: as below + - `ord=-2`: as below + - other: `sum(abs(x)**ord)**(1./ord)` + +Returns: + Norm of the matrix or vector(s). + +Example: + +>>> x = keras.ops.reshape(keras.ops.arange(9, dtype="float32") - 4, (3, 3)) +>>> keras.ops.linalg.norm(x) +7.7459664 + diff --git a/.tether/man/op_normalize.txt b/.tether/man/op_normalize.txt new file mode 100644 index 0000000000..edff2d63d4 --- /dev/null +++ b/.tether/man/op_normalize.txt @@ -0,0 +1,29 @@ +__signature__ +keras.ops.normalize( + x, + axis=-1, + order=2 +) +__doc__ +Normalizes `x` over the specified axis. + +It is defined as: `normalize(x) = x / max(norm(x), epsilon)`. + +Args: + x: Input tensor. + axis: The axis or axes along which to perform normalization. + Default to -1. + order: The exponent value in the norm formulation. + Defaults to 2. + +Returns: + The normalized array. + +Example: + +>>> x = keras.ops.convert_to_tensor([[1, 2, 3], [4, 5, 6]]) +>>> x_norm = keras.ops.math.normalize(x) +>>> print(x_norm) +array([[0.26726124 0.5345225 0.8017837 ] + [0.45584232 0.5698029 0.68376344]], shape=(2, 3), dtype=float32) + diff --git a/.tether/man/op_not_equal.txt b/.tether/man/op_not_equal.txt new file mode 100644 index 0000000000..a0f079f7ac --- /dev/null +++ b/.tether/man/op_not_equal.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.not_equal(x1, x2) +__doc__ +Return `(x1 != x2)` element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, element-wise comparsion of `x1` and `x2`. diff --git a/.tether/man/op_one_hot.txt b/.tether/man/op_one_hot.txt new file mode 100644 index 0000000000..5639a4f4d4 --- /dev/null +++ b/.tether/man/op_one_hot.txt @@ -0,0 +1,42 @@ +__signature__ +keras.ops.one_hot( + x, + num_classes, + axis=-1, + dtype=None, + sparse=False +) +__doc__ +Converts integer tensor `x` into a one-hot tensor. + +The one-hot encoding is a representation where each integer value is +converted into a binary vector with a length equal to `num_classes`, +and the index corresponding to the integer value is marked as 1, while +all other indices are marked as 0. + +Args: + x: Integer tensor to be encoded. The shape can be + arbitrary, but the dtype should be integer. + num_classes: Number of classes for the one-hot encoding. + axis: Axis along which the encoding is performed. Defaults to + `-1`, which represents the last axis. + dtype: (Optional) Data type of the output tensor. If not + provided, it defaults to the default data type of the backend. + sparse: Whether to return a sparse tensor; for backends that support + sparse tensors. + +Returns: + Integer tensor: One-hot encoded tensor with the same shape as `x` + except for the specified `axis` dimension, which will have + a length of `num_classes`. The dtype of the output tensor + is determined by `dtype` or the default data type of the backend. + +Example: + +>>> x = keras.ops.convert_to_tensor([1, 3, 2, 0]) +>>> one_hot(x, num_classes=4) +array([[0. 1. 0. 0.] + [0. 0. 0. 1.] + [0. 0. 1. 0.] + [1. 0. 0. 0.]], shape=(4, 4), dtype=float32) + diff --git a/.tether/man/op_ones.txt b/.tether/man/op_ones.txt new file mode 100644 index 0000000000..cc132469c5 --- /dev/null +++ b/.tether/man/op_ones.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.ones(shape, dtype=None) +__doc__ +Return a new tensor of given shape and type, filled with ones. + +Args: + shape: Shape of the new tensor. + dtype: Desired data type of the tensor. + +Returns: + Tensor of ones with the given shape and dtype. diff --git a/.tether/man/op_ones_like.txt b/.tether/man/op_ones_like.txt new file mode 100644 index 0000000000..dd610b81ad --- /dev/null +++ b/.tether/man/op_ones_like.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.ones_like(x, dtype=None) +__doc__ +Return a tensor of ones with the same shape and type of `x`. + +Args: + x: Input tensor. + dtype: Overrides the data type of the result. + +Returns: + A tensor of ones with the same shape and type as `x`. diff --git a/.tether/man/op_outer.txt b/.tether/man/op_outer.txt new file mode 100644 index 0000000000..5168138a87 --- /dev/null +++ b/.tether/man/op_outer.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.outer(x1, x2) +__doc__ +Compute the outer product of two vectors. + +Given two vectors `x1` and `x2`, the outer product is: + +``` +out[i, j] = x1[i] * x2[j] +``` + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Outer product of `x1` and `x2`. diff --git a/.tether/man/op_pad.txt b/.tether/man/op_pad.txt new file mode 100644 index 0000000000..d7545f9cee --- /dev/null +++ b/.tether/man/op_pad.txt @@ -0,0 +1,38 @@ +__signature__ +keras.ops.pad( + x, + pad_width, + mode='constant', + constant_values=None +) +__doc__ +Pad a tensor. + +Args: + x: Tensor to pad. + pad_width: Number of values padded to the edges of each axis. + `((before_1, after_1), ...(before_N, after_N))` unique pad + widths for each axis. + `((before, after),)` yields same before and after pad for + each axis. + `(pad,)` or `int` is a shortcut for `before = after = pad` + width for all axes. + mode: One of `"constant"`, `"edge"`, `"linear_ramp"`, + `"maximum"`, `"mean"`, `"median"`, `"minimum"`, + `"reflect"`, `"symmetric"`, `"wrap"`, `"empty"`, + `"circular"`. Defaults to`"constant"`. + constant_values: value to pad with if `mode == "constant"`. + Defaults to `0`. A `ValueError` is raised if not None and + `mode != "constant"`. + +Note: + Torch backend only supports modes `"constant"`, `"reflect"`, + `"symmetric"` and `"circular"`. + Only Torch backend supports `"circular"` mode. + +Note: + Tensorflow backend only supports modes `"constant"`, `"reflect"` + and `"symmetric"`. + +Returns: + Padded tensor. diff --git a/.tether/man/op_power.txt b/.tether/man/op_power.txt new file mode 100644 index 0000000000..372aea4926 --- /dev/null +++ b/.tether/man/op_power.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.power(x1, x2) +__doc__ +First tensor elements raised to powers from second tensor, element-wise. + +Args: + x1: The bases. + x2: The exponents. + +Returns: + Output tensor, the bases in `x1` raised to the exponents in `x2`. diff --git a/.tether/man/op_prod.txt b/.tether/man/op_prod.txt new file mode 100644 index 0000000000..dd9524c251 --- /dev/null +++ b/.tether/man/op_prod.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.prod( + x, + axis=None, + keepdims=False, + dtype=None +) +__doc__ +Return the product of tensor elements over a given axis. + +Args: + x: Input tensor. + axis: Axis or axes along which a product is performed. The default, + `axis=None`, will compute the product of all elements + in the input tensor. + keepdims: If this is set to `True`, the axes which are reduce + are left in the result as dimensions with size one. + dtype: Data type of the returned tensor. + +Returns: + Product of elements of `x` over the given axis or axes. diff --git a/.tether/man/op_qr.txt b/.tether/man/op_qr.txt new file mode 100644 index 0000000000..0b2213b4a5 --- /dev/null +++ b/.tether/man/op_qr.txt @@ -0,0 +1,25 @@ +__signature__ +keras.ops.qr(x, mode='reduced') +__doc__ +Computes the QR decomposition of a tensor. + +Args: + x: Input tensor of shape `(..., M, N)`. + mode: A string specifying the mode of the QR decomposition. + - 'reduced': Returns the reduced QR decomposition. (default) + - 'complete': Returns the complete QR decomposition. + +Returns: + A tuple containing two tensors. The first tensor of shape `(..., M, K)` + is the orthogonal matrix `q` and the second tensor of shape + `(..., K, N)` is the upper triangular matrix `r`, where `K = min(M, N)`. + +Example: + +>>> x = keras.ops.convert_to_tensor([[1., 2.], [3., 4.], [5., 6.]]) +>>> q, r = qr(x) +>>> print(q) +array([[-0.16903079 0.897085] + [-0.5070925 0.2760267 ] + [-0.8451542 -0.34503305]], shape=(3, 2), dtype=float32) + diff --git a/.tether/man/op_quantile.txt b/.tether/man/op_quantile.txt new file mode 100644 index 0000000000..05e0a75e07 --- /dev/null +++ b/.tether/man/op_quantile.txt @@ -0,0 +1,36 @@ +__signature__ +keras.ops.quantile( + x, + q, + axis=None, + method='linear', + keepdims=False +) +__doc__ +Compute the q-th quantile(s) of the data along the specified axis. + +Args: + x: Input tensor. + q: Probability or sequence of probabilities for the quantiles to + compute. Values must be between 0 and 1 inclusive. + axis: Axis or axes along which the quantiles are computed. Defaults to + `axis=None` which is to compute the quantile(s) along a flattened + version of the array. + method: A string specifies the method to use for estimating the + quantile. Available methods are `"linear"`, `"lower"`, `"higher"`, + `"midpoint"`, and `"nearest"`. Defaults to `"linear"`. + If the desired quantile lies between two data points `i < j`: + - `"linear"`: `i + (j - i) * fraction`, where fraction is the + fractional part of the index surrounded by `i` and `j`. + - `"lower"`: `i`. + - `"higher"`: `j`. + - `"midpoint"`: `(i + j) / 2` + - `"nearest"`: `i` or `j`, whichever is nearest. + keepdims: If this is set to `True`, the axes which are reduce + are left in the result as dimensions with size one. + +Returns: + The quantile(s). If `q` is a single probability and `axis=None`, then + the result is a scalar. If multiple probabilies levels are given, first + axis of the result corresponds to the quantiles. The other axes are the + axes that remain after the reduction of `x`. diff --git a/.tether/man/op_ravel.txt b/.tether/man/op_ravel.txt new file mode 100644 index 0000000000..f487c30297 --- /dev/null +++ b/.tether/man/op_ravel.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.ravel(x) +__doc__ +Return a contiguous flattened tensor. + +A 1-D tensor, containing the elements of the input, is returned. + +Args: + x: Input tensor. + +Returns: + Output tensor. diff --git a/.tether/man/op_real.txt b/.tether/man/op_real.txt new file mode 100644 index 0000000000..9580f4be99 --- /dev/null +++ b/.tether/man/op_real.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.real(x) +__doc__ +Return the real part of the complex argument. + +Args: + x: Input tensor. + +Returns: + The real component of the complex argument. diff --git a/.tether/man/op_reciprocal.txt b/.tether/man/op_reciprocal.txt new file mode 100644 index 0000000000..f6d2b39ef7 --- /dev/null +++ b/.tether/man/op_reciprocal.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.reciprocal(x) +__doc__ +Return the reciprocal of the argument, element-wise. + +Calculates `1/x`. + +Args: + x: Input tensor. + +Returns: + Output tensor, element-wise reciprocal of `x`. diff --git a/.tether/man/op_relu.txt b/.tether/man/op_relu.txt new file mode 100644 index 0000000000..b8f05bc9c1 --- /dev/null +++ b/.tether/man/op_relu.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.relu(x) +__doc__ +Rectified linear unit activation function. + +It is defined as `f(x) = max(0, x)`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x1 = keras.ops.convert_to_tensor([-1.0, 0.0, 1.0, 0.2]) +>>> keras.ops.relu(x1) +array([0.0, 0.0, 1.0, 0.2], dtype=float32) diff --git a/.tether/man/op_relu6.txt b/.tether/man/op_relu6.txt new file mode 100644 index 0000000000..7f8d06f66f --- /dev/null +++ b/.tether/man/op_relu6.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.relu6(x) +__doc__ +Rectified linear unit activation function with upper bound of 6. + +It is defined as `f(x) = np.clip(x, 0, 6)`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-3.0, -2.0, 0.1, 0.2, 6.0, 8.0]) +>>> keras.ops.relu6(x) +array([0.0, 0.0, 0.1, 0.2, 6.0, 6.0], dtype=float32) diff --git a/.tether/man/op_repeat.txt b/.tether/man/op_repeat.txt new file mode 100644 index 0000000000..5c3b9bdabb --- /dev/null +++ b/.tether/man/op_repeat.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.repeat( + x, + repeats, + axis=None +) +__doc__ +Repeat each element of a tensor after themselves. + +Args: + x: Input tensor. + repeats: The number of repetitions for each element. + axis: The axis along which to repeat values. By default, use + the flattened input array, and return a flat output array. + +Returns: + Output tensor. diff --git a/.tether/man/op_reshape.txt b/.tether/man/op_reshape.txt new file mode 100644 index 0000000000..3dae153a20 --- /dev/null +++ b/.tether/man/op_reshape.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.reshape(x, newshape) +__doc__ +Gives a new shape to a tensor without changing its data. + +Args: + x: Input tensor. + newshape: The new shape should be compatible with the original shape. + One shape dimension can be -1 in which case the value is + inferred from the length of the array and remaining dimensions. + +Returns: + The reshaped tensor. + diff --git a/.tether/man/op_rfft.txt b/.tether/man/op_rfft.txt new file mode 100644 index 0000000000..afae4b85af --- /dev/null +++ b/.tether/man/op_rfft.txt @@ -0,0 +1,35 @@ +__signature__ +keras.ops.rfft(x, fft_length=None) +__doc__ +Real-valued Fast Fourier Transform along the last axis of the input. + +Computes the 1D Discrete Fourier Transform of a real-valued signal over the +inner-most dimension of input. + +Since the Discrete Fourier Transform of a real-valued signal is +Hermitian-symmetric, RFFT only returns the `fft_length / 2 + 1` unique +components of the FFT: the zero-frequency term, followed by the +`fft_length / 2` positive-frequency terms. + +Along the axis RFFT is computed on, if `fft_length` is smaller than the +corresponding dimension of the input, the dimension is cropped. If it is +larger, the dimension is padded with zeros. + +Args: + x: Input tensor. + fft_length: An integer representing the number of the fft length. If not + specified, it is inferred from the length of the last axis of `x`. + Defaults to `None`. + +Returns: + A tuple containing two tensors - the real and imaginary parts of the + output. + +Examples: + +>>> x = keras.ops.convert_to_tensor([0.0, 1.0, 2.0, 3.0, 4.0]) +>>> rfft(x) +(array([10.0, -2.5, -2.5]), array([0.0, 3.4409548, 0.81229924])) + +>>> rfft(x, 3) +(array([3.0, -1.5]), array([0.0, 0.8660254])) diff --git a/.tether/man/op_roll.txt b/.tether/man/op_roll.txt new file mode 100644 index 0000000000..1db3d84b6e --- /dev/null +++ b/.tether/man/op_roll.txt @@ -0,0 +1,20 @@ +__signature__ +keras.ops.roll( + x, + shift, + axis=None +) +__doc__ +Roll tensor elements along a given axis. + +Elements that roll beyond the last position are re-introduced at the first. + +Args: + x: Input tensor. + shift: The number of places by which elements are shifted. + axis: The axis along which elements are shifted. By default, the + array is flattened before shifting, after which the original + shape is restored. + +Returns: + Output tensor. diff --git a/.tether/man/op_round.txt b/.tether/man/op_round.txt new file mode 100644 index 0000000000..18d61c0f23 --- /dev/null +++ b/.tether/man/op_round.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.round(x, decimals=0) +__doc__ +Evenly round to the given number of decimals. + +Args: + x: Input tensor. + decimals: Number of decimal places to round to. Defaults to `0`. + +Returns: + Output tensor. diff --git a/.tether/man/op_rsqrt.txt b/.tether/man/op_rsqrt.txt new file mode 100644 index 0000000000..08147f05a6 --- /dev/null +++ b/.tether/man/op_rsqrt.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.rsqrt(x) +__doc__ +Computes reciprocal of square root of x element-wise. + +Args: + x: input tensor + +Returns: + A tensor with the same dtype as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([1.0, 10.0, 100.0]) +>>> keras.ops.rsqrt(x) +array([1.0, 0.31622776, 0.1], dtype=float32) diff --git a/.tether/man/op_scatter.txt b/.tether/man/op_scatter.txt new file mode 100644 index 0000000000..9058f3907c --- /dev/null +++ b/.tether/man/op_scatter.txt @@ -0,0 +1,30 @@ +__signature__ +keras.ops.scatter( + indices, + values, + shape +) +__doc__ +Returns a tensor of shape `shape` where `indices` are set to `values`. + +At a high level, this operation does `zeros[indices] = updates` and +returns the output. It is equivalent to: + +```python +zeros = keras.ops.zeros(shape) +output = keras.ops.scatter_update(zeros, indices, values) +``` + +Args: + indices: A tensor or list/tuple specifying + indices for the values in `values`. + values: A tensor, the values to be set at `indices`. + shape: Shape of the output tensor. + +Example: + +>>> indices = [[0, 1], [1, 1]] +>>> values = np.array([1., 1.]) +>>> keras.ops.scatter(indices, values, shape=(2, 2)) +array([[0., 1.], + [0., 1.]]) diff --git a/.tether/man/op_scatter_update.txt b/.tether/man/op_scatter_update.txt new file mode 100644 index 0000000000..0ac5e8de1c --- /dev/null +++ b/.tether/man/op_scatter_update.txt @@ -0,0 +1,51 @@ +__signature__ +keras.ops.scatter_update( + inputs, + indices, + updates +) +__doc__ +Update inputs via updates at scattered (sparse) indices. + +At a high level, this operation does `inputs[indices] = updates`. +Assume `inputs` is a tensor of shape `(D0, D1, ..., Dn)`, there are 2 main +usages of `scatter_update`. + +1. `indices` is a 2D tensor of shape `(num_updates, n)`, where `num_updates` + is the number of updates to perform, and `updates` is a 1D tensor of + shape `(num_updates,)`. For example, if `inputs` is `zeros((4, 4, 4))`, + and we want to update `inputs[1, 2, 3]` and `inputs[0, 1, 3]` as 1, then + we can use: + +```python +inputs = np.zeros((4, 4, 4)) +indices = [[1, 2, 3], [0, 1, 3]] +updates = np.array([1., 1.]) +inputs = keras.ops.scatter_update(inputs, indices, updates) +``` + +2 `indices` is a 2D tensor of shape `(num_updates, k)`, where `num_updates` + is the number of updates to perform, and `k` (`k < n`) is the size of + each index in `indices`. `updates` is a `n - k`-D tensor of shape + `(num_updates, inputs.shape[k:])`. For example, if + `inputs = np.zeros((4, 4, 4))`, and we want to update `inputs[1, 2, :]` + and `inputs[2, 3, :]` as `[1, 1, 1, 1]`, then `indices` would have shape + `(num_updates, 2)` (`k = 2`), and `updates` would have shape + `(num_updates, 4)` (`inputs.shape[2:] = 4`). See the code below: + +```python +inputs = np.zeros((4, 4, 4)) +indices = [[1, 2], [2, 3]] +updates = np.array([[1., 1., 1, 1,], [1., 1., 1, 1,]) +inputs = keras.ops.scatter_update(inputs, indices, updates) +``` + +Args: + inputs: A tensor, the tensor to be updated. + indices: A tensor or list/tuple of shape `(N, inputs.ndim)`, specifying + indices to update. `N` is the number of indices to update, must be + equal to the first dimension of `updates`. + updates: A tensor, the new values to be put to `inputs` at `indices`. + +Returns: + A tensor, has the same shape and dtype as `inputs`. diff --git a/.tether/man/op_segment_max.txt b/.tether/man/op_segment_max.txt new file mode 100644 index 0000000000..b2d98b3930 --- /dev/null +++ b/.tether/man/op_segment_max.txt @@ -0,0 +1,31 @@ +__signature__ +keras.ops.segment_max( + data, + segment_ids, + num_segments=None, + sorted=False +) +__doc__ +Computes the max of segments in a tensor. + +Args: + data: Input tensor. + segment_ids: A 1-D tensor containing segment indices for each + element in `data`. + num_segments: An integer representing the total number of + segments. If not specified, it is inferred from the maximum + value in `segment_ids`. + sorted: A boolean indicating whether `segment_ids` is sorted. + Defaults to`False`. + +Returns: + A tensor containing the max of segments, where each element + represents the max of the corresponding segment in `data`. + +Example: + +>>> data = keras.ops.convert_to_tensor([1, 2, 10, 20, 100, 200]) +>>> segment_ids = keras.ops.convert_to_tensor([0, 0, 1, 1, 2, 2]) +>>> num_segments = 3 +>>> keras.ops.segment_max(data, segment_ids, num_segments) +array([2, 20, 200], dtype=int32) diff --git a/.tether/man/op_segment_sum.txt b/.tether/man/op_segment_sum.txt new file mode 100644 index 0000000000..75cd0563c8 --- /dev/null +++ b/.tether/man/op_segment_sum.txt @@ -0,0 +1,31 @@ +__signature__ +keras.ops.segment_sum( + data, + segment_ids, + num_segments=None, + sorted=False +) +__doc__ +Computes the sum of segments in a tensor. + +Args: + data: Input tensor. + segment_ids: A 1-D tensor containing segment indices for each + element in `data`. + num_segments: An integer representing the total number of + segments. If not specified, it is inferred from the maximum + value in `segment_ids`. + sorted: A boolean indicating whether `segment_ids` is sorted. + Defaults to`False`. + +Returns: + A tensor containing the sum of segments, where each element + represents the sum of the corresponding segment in `data`. + +Example: + +>>> data = keras.ops.convert_to_tensor([1, 2, 10, 20, 100, 200]) +>>> segment_ids = keras.ops.convert_to_tensor([0, 0, 1, 1, 2, 2]) +>>> num_segments = 3 +>>> keras.ops.segment_sum(data, segment_ids,num_segments) +array([3, 30, 300], dtype=int32) diff --git a/.tether/man/op_select.txt b/.tether/man/op_select.txt new file mode 100644 index 0000000000..2acfb6e6f6 --- /dev/null +++ b/.tether/man/op_select.txt @@ -0,0 +1,39 @@ +__signature__ +keras.ops.select( + condlist, + choicelist, + default=0 +) +__doc__ +Return elements from `choicelist`, based on conditions in `condlist`. + +Args: + condlist: List of boolean tensors. + The list of conditions which determine from which array + in choicelist the output elements are taken. + When multiple conditions are satisfied, + the first one encountered in condlist is used. + choicelist: List of tensors. + The list of tensors from which the output elements are taken. + This list has to be of the same length as `condlist`. + defaults: Optional scalar value. + The element inserted in the output + when all conditions evaluate to `False`. + +Returns: + Tensor where the output at position `m` is the `m`-th element + of the tensor in `choicelist` where the `m`-th element of the + corresponding tensor in `condlist` is `True`. + +Example: + +```python +from keras import ops + +x = ops.arange(6) +condlist = [x<3, x>3] +choicelist = [x, x**2] +ops.select(condlist, choicelist, 42) +# Returns: tensor([0, 1, 2, 42, 16, 25]) +``` + diff --git a/.tether/man/op_selu.txt b/.tether/man/op_selu.txt new file mode 100644 index 0000000000..dce362ad62 --- /dev/null +++ b/.tether/man/op_selu.txt @@ -0,0 +1,22 @@ +__signature__ +keras.ops.selu(x) +__doc__ +Scaled Exponential Linear Unit (SELU) activation function. + +It is defined as: + +`f(x) = scale * alpha * (exp(x) - 1.) for x < 0`, +`f(x) = scale * x for x >= 0`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = np.array([-1., 0., 1.]) +>>> x_selu = keras.ops.selu(x) +>>> print(x_selu) +array([-1.11133055, 0., 1.05070098], shape=(3,), dtype=float64) diff --git a/.tether/man/op_separable_conv.txt b/.tether/man/op_separable_conv.txt new file mode 100644 index 0000000000..4ffd76d8d0 --- /dev/null +++ b/.tether/man/op_separable_conv.txt @@ -0,0 +1,50 @@ +__signature__ +keras.ops.separable_conv( + inputs, + depthwise_kernel, + pointwise_kernel, + strides=1, + padding='valid', + data_format=None, + dilation_rate=1 +) +__doc__ +General N-D separable convolution. + +This ops supports 1D and 2D separable convolution. `separable_conv` is +a depthwise conv followed by a pointwise conv. + +Args: + inputs: Tensor of rank N+2. `inputs` has shape + `(batch_size,) + inputs_spatial_shape + (num_channels,)` if + `data_format="channels_last"`, or + `(batch_size, num_channels) + inputs_spatial_shape` if + `data_format="channels_first"`. + depthwise_kernel: Tensor of rank N+2. `depthwise_kernel` has shape + [kernel_spatial_shape, num_input_channels, num_channels_multiplier], + `num_input_channels` should match the number of channels in + `inputs`. + pointwise_kernel: Tensor of rank N+2. `pointwise_kernel` has shape + `(*ones_like(kernel_spatial_shape), + num_input_channels * num_channels_multiplier, num_output_channels)`. + strides: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the strides of the convolution along each spatial + dimension. If `strides` is int, then every spatial dimension shares + the same `strides`. + padding: string, either `"valid"` or `"same"`. `"valid"` means no + padding is applied, and `"same"` results in padding evenly to the + left/right or up/down of the input such that output has the + same height/width dimension as the input when `strides=1`. + data_format: A string, either `"channels_last"` or `"channels_first"`. + `data_format` determines the ordering of the dimensions in the + inputs. If `data_format="channels_last"`, `inputs` is of shape + `(batch_size, ..., channels)` while if + `data_format="channels_first"`, `inputs` is of shape + `(batch_size, channels, ...)`. + dilation_rate: int or int tuple/list of `len(inputs_spatial_shape)`, + specifying the dilation rate to use for dilated convolution. If + `dilation_rate` is int, then every spatial dimension shares + the same `dilation_rate`. + +Returns: + A tensor of rank N+2, the result of the depthwise conv operation. diff --git a/.tether/man/op_shape.txt b/.tether/man/op_shape.txt new file mode 100644 index 0000000000..c2a37b5d1b --- /dev/null +++ b/.tether/man/op_shape.txt @@ -0,0 +1,22 @@ +__signature__ +keras.ops.shape(x) +__doc__ +Gets the shape of the tensor input. + +Note: On the TensorFlow backend, when `x` is a `tf.Tensor` with dynamic +shape, dimensions which are dynamic in the context of a compiled function +will have a `tf.Tensor` value instead of a static integer value. + +Args: + x: A tensor. This function will try to access the `shape` attribute of + the input tensor. + +Returns: + A tuple of integers or None values, indicating the shape of the input + tensor. + +Example: + +>>> x = keras.zeros((8, 12)) +>>> keras.ops.shape(x) +(8, 12) diff --git a/.tether/man/op_sigmoid.txt b/.tether/man/op_sigmoid.txt new file mode 100644 index 0000000000..b57edfb55b --- /dev/null +++ b/.tether/man/op_sigmoid.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.sigmoid(x) +__doc__ +Sigmoid activation function. + +It is defined as `f(x) = 1 / (1 + exp(-x))`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-6.0, 1.0, 0.0, 1.0, 6.0]) +>>> keras.ops.sigmoid(x) +array([0.00247262, 0.7310586, 0.5, 0.7310586, 0.9975274], dtype=float32) diff --git a/.tether/man/op_sign.txt b/.tether/man/op_sign.txt new file mode 100644 index 0000000000..5225b2750b --- /dev/null +++ b/.tether/man/op_sign.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.sign(x) +__doc__ +Returns a tensor with the signs of the elements of `x`. + +Args: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. diff --git a/.tether/man/op_silu.txt b/.tether/man/op_silu.txt new file mode 100644 index 0000000000..6d95de1bf6 --- /dev/null +++ b/.tether/man/op_silu.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.silu(x) +__doc__ +Sigmoid Linear Unit (SiLU) activation function, also known as Swish. + +The SiLU activation function is computed by the sigmoid function multiplied +by its input. It is defined as `f(x) = x * sigmoid(x)`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-6.0, 1.0, 0.0, 1.0, 6.0]) +>>> keras.ops.sigmoid(x) +array([0.00247262, 0.7310586, 0.5, 0.7310586, 0.9975274], dtype=float32) +>>> keras.ops.silu(x) +array([-0.0148357, 0.7310586, 0.0, 0.7310586, 5.9851646], dtype=float32) diff --git a/.tether/man/op_sin.txt b/.tether/man/op_sin.txt new file mode 100644 index 0000000000..4e09b6c50e --- /dev/null +++ b/.tether/man/op_sin.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.sin(x) +__doc__ +Trigonometric sine, element-wise. + +Arguments: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. + diff --git a/.tether/man/op_sinh.txt b/.tether/man/op_sinh.txt new file mode 100644 index 0000000000..aef2a766b8 --- /dev/null +++ b/.tether/man/op_sinh.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.sinh(x) +__doc__ +Hyperbolic sine, element-wise. + +Arguments: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. diff --git a/.tether/man/op_size.txt b/.tether/man/op_size.txt new file mode 100644 index 0000000000..cf2954423c --- /dev/null +++ b/.tether/man/op_size.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.size(x) +__doc__ +Return the number of elements in a tensor. + +Args: + x: Input tensor. + +Returns: + Number of elements in `x`. diff --git a/.tether/man/op_slice.txt b/.tether/man/op_slice.txt new file mode 100644 index 0000000000..9029502ef9 --- /dev/null +++ b/.tether/man/op_slice.txt @@ -0,0 +1,30 @@ +__signature__ +keras.ops.slice( + inputs, + start_indices, + shape +) +__doc__ +Return a slice of an input tensor. + +At a high level, this operation is an explicit replacement for array slicing +e.g. `inputs[start_indices: start_indices + shape]`. +Unlike slicing via brackets, this operation will accept tensor start +indices on all backends, which is useful when indices dynamically computed +via other tensor operations. + +```python +inputs = np.zeros((5, 5)) +start_indices = np.array([3, 3]) +shape = np.array([2, 2]) +inputs = keras.ops.slice(inputs, start_indices, updates) +``` + +Args: + inputs: A tensor, the tensor to be updated. + start_indices: A list/tuple of shape `(inputs.ndim,)`, specifying + the starting indices for updating. + shape: The full shape of the returned slice. + +Returns: + A tensor, has the same shape and dtype as `inputs`. diff --git a/.tether/man/op_slice_update.txt b/.tether/man/op_slice_update.txt new file mode 100644 index 0000000000..d4ac4d0d52 --- /dev/null +++ b/.tether/man/op_slice_update.txt @@ -0,0 +1,35 @@ +__signature__ +keras.ops.slice_update( + inputs, + start_indices, + updates +) +__doc__ +Update an input by slicing in a tensor of updated values. + +At a high level, this operation does +`inputs[start_indices: start_indices + updates.shape] = updates`. +Assume inputs is a tensor of shape `(D0, D1, ..., Dn)`, +`start_indices` must be a list/tuple of n integers, specifying the starting +indices. `updates` must have the same rank as `inputs`, and the size of each +dim must not exceed `Di - start_indices[i]`. For example, if we have 2D +inputs `inputs = np.zeros((5, 5))`, and we want to update the intersection +of last 2 rows and last 2 columns as 1, i.e., +`inputs[3:, 3:] = np.ones((2, 2))`, then we can use the code below: + +```python +inputs = np.zeros((5, 5)) +start_indices = [3, 3] +updates = np.ones((2, 2)) +inputs = keras.ops.slice_update(inputs, start_indices, updates) +``` + +Args: + inputs: A tensor, the tensor to be updated. + start_indices: A list/tuple of shape `(inputs.ndim,)`, specifying + the starting indices for updating. + updates: A tensor, the new values to be put to `inputs` at `indices`. + `updates` must have the same rank as `inputs`. + +Returns: + A tensor, has the same shape and dtype as `inputs`. diff --git a/.tether/man/op_softmax.txt b/.tether/man/op_softmax.txt new file mode 100644 index 0000000000..2b5bee2f46 --- /dev/null +++ b/.tether/man/op_softmax.txt @@ -0,0 +1,27 @@ +__signature__ +keras.ops.softmax(x, axis=-1) +__doc__ +Softmax activation function. + +The elements of the output vector lie within the range `(0, 1)`, and their +total sum is exactly 1 (excluding the floating point rounding error). + +Each vector is processed independently. The `axis` argument specifies the +axis along which the function is applied within the input. + +It is defined as: +`f(x) = exp(x) / sum(exp(x))` + +Args: + x: Input tensor. + axis: Integer, axis along which the softmax is applied. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = np.array([-1., 0., 1.]) +>>> x_softmax = keras.ops.softmax(x) +>>> print(x_softmax) +array([0.09003057, 0.24472847, 0.66524096], shape=(3,), dtype=float64) diff --git a/.tether/man/op_softplus.txt b/.tether/man/op_softplus.txt new file mode 100644 index 0000000000..37fb902811 --- /dev/null +++ b/.tether/man/op_softplus.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.softplus(x) +__doc__ +Softplus activation function. + +It is defined as `f(x) = log(exp(x) + 1)`, where `log` is the natural +logarithm and `exp` is the exponential function. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-0.555, 0.0, 0.555]) +>>> keras.ops.softplus(x) +array([0.45366603, 0.6931472, 1.008666], dtype=float32) diff --git a/.tether/man/op_softsign.txt b/.tether/man/op_softsign.txt new file mode 100644 index 0000000000..89deec71ae --- /dev/null +++ b/.tether/man/op_softsign.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.softsign(x) +__doc__ +Softsign activation function. + +It is defined as `f(x) = x / (abs(x) + 1)`. + +Args: + x: Input tensor. + +Returns: + A tensor with the same shape as `x`. + +Example: + +>>> x = keras.ops.convert_to_tensor([-0.100, -10.0, 1.0, 0.0, 100.0]) +>>> keras.ops.softsign(x) +Array([-0.09090909, -0.90909094, 0.5, 0.0, 0.990099], dtype=float32) diff --git a/.tether/man/op_solve.txt b/.tether/man/op_solve.txt new file mode 100644 index 0000000000..b75dcdee17 --- /dev/null +++ b/.tether/man/op_solve.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.solve(a, b) +__doc__ +Solves a linear system of equations given by `a x = b`. + +Args: + a: A tensor of shape `(..., M, M)` representing the coefficients matrix. + b: A tensor of shape `(..., M)` or `(..., M, N)` represeting the + right-hand side or "dependent variable" matrix. + +Returns: + A tensor of shape `(..., M)` or `(..., M, N)` representing the solution + of the linear system. Returned shape is identical to `b`. + diff --git a/.tether/man/op_solve_triangular.txt b/.tether/man/op_solve_triangular.txt new file mode 100644 index 0000000000..3ba14ff5e6 --- /dev/null +++ b/.tether/man/op_solve_triangular.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.solve_triangular( + a, + b, + lower=False +) +__doc__ +Solves a linear system of equations given by `a x = b`. + +Args: + a: A tensor of shape `(..., M, M)` representing the coefficients matrix. + b: A tensor of shape `(..., M)` or `(..., M, N)` represeting the + right-hand side or "dependent variable" matrix. + +Returns: + A tensor of shape `(..., M)` or `(..., M, N)` representing the solution + of the linear system. Returned shape is identical to `b`. + diff --git a/.tether/man/op_sort.txt b/.tether/man/op_sort.txt new file mode 100644 index 0000000000..f58ea20df7 --- /dev/null +++ b/.tether/man/op_sort.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.sort(x, axis=-1) +__doc__ +Sorts the elements of `x` along a given axis in ascending order. + +Args: + x: Input tensor. + axis: Axis along which to sort. If `None`, the tensor is flattened + before sorting. Defaults to `-1`; the last axis. + +Returns: + Sorted tensor. diff --git a/.tether/man/op_sparse_categorical_crossentropy.txt b/.tether/man/op_sparse_categorical_crossentropy.txt new file mode 100644 index 0000000000..d4843bbc49 --- /dev/null +++ b/.tether/man/op_sparse_categorical_crossentropy.txt @@ -0,0 +1,46 @@ +__signature__ +keras.ops.sparse_categorical_crossentropy( + target, + output, + from_logits=False, + axis=-1 +) +__doc__ +Computes sparse categorical cross-entropy loss. + +The sparse categorical cross-entropy loss is similar to categorical +cross-entropy, but it is used when the target tensor contains integer +class labels instead of one-hot encoded vectors. It measures the +dissimilarity between the target and output probabilities or logits. + +Args: + target: The target tensor representing the true class labels as + integers. Its shape should match the shape of the `output` + tensor except for the last dimension. + output: The output tensor representing the predicted probabilities + or logits. + Its shape should match the shape of the `target` tensor except + for the last dimension. + from_logits: (optional) Whether `output` is a tensor of logits + or probabilities. + Set it to `True` if `output` represents logits; otherwise, + set it to `False` if `output` represents probabilities. + Defaults to`False`. + axis: (optional) The axis along which the sparse categorical + cross-entropy is computed. + Defaults to `-1`, which corresponds to the last dimension + of the tensors. + +Returns: + Integer tensor: The computed sparse categorical cross-entropy + loss between `target` and `output`. + +Example: + +>>> target = keras.ops.convert_to_tensor([0, 1, 2], dtype=int32) +>>> output = keras.ops.convert_to_tensor( +... [[0.9, 0.05, 0.05], +... [0.1, 0.8, 0.1], +... [0.2, 0.3, 0.5]]) +>>> sparse_categorical_crossentropy(target, output) +array([0.10536056 0.22314355 0.6931472 ], shape=(3,), dtype=float32) diff --git a/.tether/man/op_split.txt b/.tether/man/op_split.txt new file mode 100644 index 0000000000..2c8d891ce6 --- /dev/null +++ b/.tether/man/op_split.txt @@ -0,0 +1,24 @@ +__signature__ +keras.ops.split( + x, + indices_or_sections, + axis=0 +) +__doc__ +Split a tensor into chunks. + +Args: + x: Input tensor. + indices_or_sections: If an integer, N, the tensor will be split into N + equal sections along `axis`. If a 1-D array of sorted integers, + the entries indicate indices at which the tensor will be split + along `axis`. + axis: Axis along which to split. Defaults to `0`. + +Note: + A split does not have to result in equal division when using + Torch backend. + +Returns: + A list of tensors. + diff --git a/.tether/man/op_sqrt.txt b/.tether/man/op_sqrt.txt new file mode 100644 index 0000000000..ce6fd6efb7 --- /dev/null +++ b/.tether/man/op_sqrt.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.sqrt(x) +__doc__ +Return the non-negative square root of a tensor, element-wise. + +Args: + x: Input tensor. + +Returns: + Output tensor, the non-negative square root of `x`. diff --git a/.tether/man/op_square.txt b/.tether/man/op_square.txt new file mode 100644 index 0000000000..acfaf44c6c --- /dev/null +++ b/.tether/man/op_square.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.square(x) +__doc__ +Return the element-wise square of the input. + +Args: + x: Input tensor. + +Returns: + Output tensor, the square of `x`. diff --git a/.tether/man/op_squeeze.txt b/.tether/man/op_squeeze.txt new file mode 100644 index 0000000000..0f873db8c7 --- /dev/null +++ b/.tether/man/op_squeeze.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.squeeze(x, axis=None) +__doc__ +Remove axes of length one from `x`. + +Args: + x: Input tensor. + axis: Select a subset of the entries of length one in the shape. + +Returns: + The input tensor with all or a subset of the dimensions of + length 1 removed. diff --git a/.tether/man/op_stack.txt b/.tether/man/op_stack.txt new file mode 100644 index 0000000000..25becffabc --- /dev/null +++ b/.tether/man/op_stack.txt @@ -0,0 +1,14 @@ +__signature__ +keras.ops.stack(x, axis=0) +__doc__ +Join a sequence of tensors along a new axis. + +The `axis` parameter specifies the index of the new axis in the +dimensions of the result. + +Args: + x: A sequence of tensors. + axis: Axis along which to stack. Defaults to `0`. + +Returns: + The stacked tensor. diff --git a/.tether/man/op_std.txt b/.tether/man/op_std.txt new file mode 100644 index 0000000000..defe8f2ab0 --- /dev/null +++ b/.tether/man/op_std.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.std( + x, + axis=None, + keepdims=False +) +__doc__ +Compute the standard deviation along the specified axis. + +Args: + x: Input tensor. + axis: Axis along which to compute standard deviation. + Default is to compute the standard deviation of the + flattened tensor. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. + +Returns: + Output tensor containing the standard deviation values. diff --git a/.tether/man/op_stft.txt b/.tether/man/op_stft.txt new file mode 100644 index 0000000000..fff125f8af --- /dev/null +++ b/.tether/man/op_stft.txt @@ -0,0 +1,44 @@ +__signature__ +keras.ops.stft( + x, + sequence_length, + sequence_stride, + fft_length, + window='hann', + center=True +) +__doc__ +Short-Time Fourier Transform along the last axis of the input. + +The STFT computes the Fourier transform of short overlapping windows of the +input. This giving frequency components of the signal as they change over +time. + +Args: + x: Input tensor. + sequence_length: An integer representing the sequence length. + sequence_stride: An integer representing the sequence hop size. + fft_length: An integer representing the size of the FFT to apply. If not + specified, uses the smallest power of 2 enclosing `sequence_length`. + window: A string, a tensor of the window or `None`. If `window` is a + string, available values are `"hann"` and `"hamming"`. If `window` + is a tensor, it will be used directly as the window and its length + must be `sequence_length`. If `window` is `None`, no windowing is + used. Defaults to `"hann"`. + center: Whether to pad `x` on both sides so that the t-th sequence is + centered at time `t * sequence_stride`. Otherwise, the t-th sequence + begins at time `t * sequence_stride`. Defaults to `True`. + +Returns: + A tuple containing two tensors - the real and imaginary parts of the + STFT output. + +Example: + +>>> x = keras.ops.convert_to_tensor([0.0, 1.0, 2.0, 3.0, 4.0]) +>>> stft(x, 3, 2, 3) +(array([[0.75, -0.375], + [3.75, -1.875], + [5.25, -2.625]]), array([[0.0, 0.64951905], + [0.0, 0.64951905], + [0.0, -0.64951905]])) diff --git a/.tether/man/op_stop_gradient.txt b/.tether/man/op_stop_gradient.txt new file mode 100644 index 0000000000..4540c209b9 --- /dev/null +++ b/.tether/man/op_stop_gradient.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.stop_gradient(variable) +__doc__ +Stops gradient computation. + +Args: + variable: A tensor variable for which the gradient + computation is to be disabled. + +Returns: + The variable with gradient computation disabled. + +Examples: + +>>> var = keras.backend.convert_to_tensor( +... [1., 2., 3.], +... dtype="float32" +... ) +>>> var = keras.ops.stop_gradient(var) diff --git a/.tether/man/op_subtract.txt b/.tether/man/op_subtract.txt new file mode 100644 index 0000000000..4f427906b3 --- /dev/null +++ b/.tether/man/op_subtract.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.subtract(x1, x2) +__doc__ +Subtract arguments element-wise. + +Args: + x1: First input tensor. + x2: Second input tensor. + +Returns: + Output tensor, element-wise difference of `x1` and `x2`. diff --git a/.tether/man/op_sum.txt b/.tether/man/op_sum.txt new file mode 100644 index 0000000000..93f6976b32 --- /dev/null +++ b/.tether/man/op_sum.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.sum( + x, + axis=None, + keepdims=False +) +__doc__ +Sum of a tensor over the given axes. + +Args: + x: Input tensor. + axis: Axis or axes along which the sum is computed. The default is to + compute the sum of the flattened tensor. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. + +Returns: + Output tensor containing the sum. diff --git a/.tether/man/op_svd.txt b/.tether/man/op_svd.txt new file mode 100644 index 0000000000..dbbde94628 --- /dev/null +++ b/.tether/man/op_svd.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.svd( + x, + full_matrices=True, + compute_uv=True +) +__doc__ +Computes the singular value decomposition of a matrix. + +Args: + x: Input tensor of shape `(..., M, N)`. + +Returns: + A tuple of three tensors: a tensor of shape `(..., M, M)` containing the + left singular vectors, a tensor of shape `(..., M, N)` containing the + singular values and a tensor of shape `(..., N, N)` containing the + right singular vectors. + diff --git a/.tether/man/op_swapaxes.txt b/.tether/man/op_swapaxes.txt new file mode 100644 index 0000000000..20bf53cffb --- /dev/null +++ b/.tether/man/op_swapaxes.txt @@ -0,0 +1,16 @@ +__signature__ +keras.ops.swapaxes( + x, + axis1, + axis2 +) +__doc__ +Interchange two axes of a tensor. + +Args: + x: Input tensor. + axis1: First axis. + axis2: Second axis. + +Returns: + A tensor with the axes swapped. diff --git a/.tether/man/op_take.txt b/.tether/man/op_take.txt new file mode 100644 index 0000000000..1e805cb19b --- /dev/null +++ b/.tether/man/op_take.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.take( + x, + indices, + axis=None +) +__doc__ +Take elements from a tensor along an axis. + +Args: + x: Source tensor. + indices: The indices of the values to extract. + axis: The axis over which to select values. By default, the + flattened input tensor is used. + +Returns: + The corresponding tensor of values. diff --git a/.tether/man/op_take_along_axis.txt b/.tether/man/op_take_along_axis.txt new file mode 100644 index 0000000000..32ef1c3d2f --- /dev/null +++ b/.tether/man/op_take_along_axis.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.take_along_axis( + x, + indices, + axis=None +) +__doc__ +Select values from `x` at the 1-D `indices` along the given axis. + +Args: + x: Source tensor. + indices: The indices of the values to extract. + axis: The axis over which to select values. By default, the flattened + input tensor is used. + +Returns: + The corresponding tensor of values. diff --git a/.tether/man/op_tan.txt b/.tether/man/op_tan.txt new file mode 100644 index 0000000000..eeb0d262e4 --- /dev/null +++ b/.tether/man/op_tan.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.tan(x) +__doc__ +Compute tangent, element-wise. + +Args: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. diff --git a/.tether/man/op_tanh.txt b/.tether/man/op_tanh.txt new file mode 100644 index 0000000000..813036b74a --- /dev/null +++ b/.tether/man/op_tanh.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.tanh(x) +__doc__ +Hyperbolic tangent, element-wise. + +Arguments: + x: Input tensor. + +Returns: + Output tensor of same shape as `x`. diff --git a/.tether/man/op_tensordot.txt b/.tether/man/op_tensordot.txt new file mode 100644 index 0000000000..8608cdc02c --- /dev/null +++ b/.tether/man/op_tensordot.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.tensordot( + x1, + x2, + axes=2 +) +__doc__ +Compute the tensor dot product along specified axes. + +Args: + x1: First tensor. + x2: Second tensor. + axes: - If an integer, N, sum over the last N axes of `x1` and the + first N axes of `x2` in order. The sizes of the corresponding + axes must match. + - Or, a list of axes to be summed over, first sequence applying + to `x1`, second to `x2`. Both sequences must be of the + same length. + +Returns: + The tensor dot product of the inputs. diff --git a/.tether/man/op_tile.txt b/.tether/man/op_tile.txt new file mode 100644 index 0000000000..3e67b5d575 --- /dev/null +++ b/.tether/man/op_tile.txt @@ -0,0 +1,19 @@ +__signature__ +keras.ops.tile(x, repeats) +__doc__ +Repeat `x` the number of times given by `repeats`. + +If `repeats` has length `d`, the result will have dimension of +`max(d, x.ndim)`. + +If `x.ndim < d`, `x` is promoted to be d-dimensional by prepending +new axes. + +If `x.ndim > d`, `repeats` is promoted to `x.ndim` by prepending 1's to it. + +Args: + x: Input tensor. + repeats: The number of repetitions of `x` along each axis. + +Returns: + The tiled output tensor. diff --git a/.tether/man/op_top_k.txt b/.tether/man/op_top_k.txt new file mode 100644 index 0000000000..ff50fe3915 --- /dev/null +++ b/.tether/man/op_top_k.txt @@ -0,0 +1,28 @@ +__signature__ +keras.ops.top_k( + x, + k, + sorted=True +) +__doc__ +Finds the top-k values and their indices in a tensor. + +Args: + x: Input tensor. + k: An integer representing the number of top elements to retrieve. + sorted: A boolean indicating whether to sort the output in + descending order. Defaults to`True`. + +Returns: + A tuple containing two tensors. The first tensor contains the + top-k values, and the second tensor contains the indices of the + top-k values in the input tensor. + +Example: + +>>> x = keras.ops.convert_to_tensor([5, 2, 7, 1, 9, 3]) +>>> values, indices = top_k(x, k=3) +>>> print(values) +array([9 7 5], shape=(3,), dtype=int32) +>>> print(indices) +array([4 2 0], shape=(3,), dtype=int32) diff --git a/.tether/man/op_trace.txt b/.tether/man/op_trace.txt new file mode 100644 index 0000000000..4149ebb2c9 --- /dev/null +++ b/.tether/man/op_trace.txt @@ -0,0 +1,33 @@ +__signature__ +keras.ops.trace( + x, + offset=0, + axis1=0, + axis2=1 +) +__doc__ +Return the sum along diagonals of the tensor. + +If `x` is 2-D, the sum along its diagonal with the given offset is +returned, i.e., the sum of elements `x[i, i+offset]` for all `i`. + +If a has more than two dimensions, then the axes specified by `axis1` +and `axis2` are used to determine the 2-D sub-arrays whose traces are +returned. + +The shape of the resulting tensor is the same as that of `x` with `axis1` +and `axis2` removed. + +Args: + x: Input tensor. + offset: Offset of the diagonal from the main diagonal. Can be + both positive and negative. Defaults to `0`. + axis1: Axis to be used as the first axis of the 2-D sub-arrays. + Defaults to `0`.(first axis). + axis2: Axis to be used as the second axis of the 2-D sub-arrays. + Defaults to `1` (second axis). + +Returns: + If `x` is 2-D, the sum of the diagonal is returned. If `x` has + larger dimensions, then a tensor of sums along diagonals is + returned. diff --git a/.tether/man/op_transpose.txt b/.tether/man/op_transpose.txt new file mode 100644 index 0000000000..012834ed41 --- /dev/null +++ b/.tether/man/op_transpose.txt @@ -0,0 +1,12 @@ +__signature__ +keras.ops.transpose(x, axes=None) +__doc__ +Returns a tensor with `axes` transposed. + +Args: + x: Input tensor. + axes: Sequence of integers. Permutation of the dimensions of `x`. + By default, the order of the axes are reversed. + +Returns: + `x` with its axes permuted. diff --git a/.tether/man/op_tri.txt b/.tether/man/op_tri.txt new file mode 100644 index 0000000000..76d4fa7e68 --- /dev/null +++ b/.tether/man/op_tri.txt @@ -0,0 +1,21 @@ +__signature__ +keras.ops.tri( + N, + M=None, + k=0, + dtype=None +) +__doc__ +Return a tensor with ones at and below a diagonal and zeros elsewhere. + +Args: + N: Number of rows in the tensor. + M: Number of columns in the tensor. + k: The sub-diagonal at and below which the array is filled. + `k = 0` is the main diagonal, while `k < 0` is below it, and + `k > 0` is above. The default is 0. + dtype: Data type of the returned tensor. The default is "float32". + +Returns: + Tensor with its lower triangle filled with ones and zeros elsewhere. + `T[i, j] == 1` for `j <= i + k`, 0 otherwise. diff --git a/.tether/man/op_tril.txt b/.tether/man/op_tril.txt new file mode 100644 index 0000000000..8ee8af5855 --- /dev/null +++ b/.tether/man/op_tril.txt @@ -0,0 +1,15 @@ +__signature__ +keras.ops.tril(x, k=0) +__doc__ +Return lower triangle of a tensor. + +For tensors with `ndim` exceeding 2, `tril` will apply to the +final two axes. + +Args: + x: Input tensor. + k: Diagonal above which to zero elements. Defaults to `0`. the + main diagonal. `k < 0` is below it, and `k > 0` is above it. + +Returns: + Lower triangle of `x`, of same shape and data type as `x`. diff --git a/.tether/man/op_triu.txt b/.tether/man/op_triu.txt new file mode 100644 index 0000000000..5b8495b0dc --- /dev/null +++ b/.tether/man/op_triu.txt @@ -0,0 +1,15 @@ +__signature__ +keras.ops.triu(x, k=0) +__doc__ +Return upper triangle of a tensor. + +For tensors with `ndim` exceeding 2, `triu` will apply to the +final two axes. + +Args: + x: Input tensor. + k: Diagonal below which to zero elements. Defaults to `0`. the + main diagonal. `k < 0` is below it, and `k > 0` is above it. + +Returns: + Upper triangle of `x`, of same shape and data type as `x`. diff --git a/.tether/man/op_true_divide.txt b/.tether/man/op_true_divide.txt new file mode 100644 index 0000000000..065f63fa78 --- /dev/null +++ b/.tether/man/op_true_divide.txt @@ -0,0 +1,4 @@ +__signature__ +keras.ops.true_divide(x1, x2) +__doc__ +Alias for `keras.ops.divide`. diff --git a/.tether/man/op_unstack.txt b/.tether/man/op_unstack.txt new file mode 100644 index 0000000000..6b7be3dea2 --- /dev/null +++ b/.tether/man/op_unstack.txt @@ -0,0 +1,23 @@ +__signature__ +keras.ops.unstack( + x, + num=None, + axis=0 +) +__doc__ +Unpacks the given dimension of a rank-R tensor into rank-(R-1) tensors. + +Args: + x: The input tensor. + num: The length of the dimension axis. Automatically inferred + if `None`. + axis: The axis along which to unpack. + +Returns: + A list of tensors unpacked along the given axis. + +Example: + +>>> x = keras.ops.array([[1, 2], [3, 4]]) +>>> keras.ops.unstack(x, axis=0) +[array([1, 2]), array([3, 4])] diff --git a/.tether/man/op_var.txt b/.tether/man/op_var.txt new file mode 100644 index 0000000000..bc13b20dfb --- /dev/null +++ b/.tether/man/op_var.txt @@ -0,0 +1,18 @@ +__signature__ +keras.ops.var( + x, + axis=None, + keepdims=False +) +__doc__ +Compute the variance along the specified axes. + +Args: + x: Input tensor. + axis: Axis or axes along which the variance is computed. The default + is to compute the variance of the flattened tensor. + keepdims: If this is set to `True`, the axes which are reduced are left + in the result as dimensions with size one. + +Returns: + Output tensor containing the variance. diff --git a/.tether/man/op_vdot.txt b/.tether/man/op_vdot.txt new file mode 100644 index 0000000000..52e4778b3f --- /dev/null +++ b/.tether/man/op_vdot.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.vdot(x1, x2) +__doc__ +Return the dot product of two vectors. + +If the first argument is complex, the complex conjugate of the first +argument is used for the calculation of the dot product. + +Multidimensional tensors are flattened before the dot product is taken. + +Args: + x1: First input tensor. If complex, its complex conjugate is taken + before calculation of the dot product. + x2: Second input tensor. + +Returns: + Output tensor. diff --git a/.tether/man/op_vectorize.txt b/.tether/man/op_vectorize.txt new file mode 100644 index 0000000000..e92d8e4ea8 --- /dev/null +++ b/.tether/man/op_vectorize.txt @@ -0,0 +1,38 @@ +__signature__ +keras.ops.vectorize( + pyfunc, + *, + excluded=None, + signature=None +) +__doc__ +Turn a function into a vectorized function. + +Example: + +```python +def myfunc(a, b): + return a + b + +vfunc = np.vectorize(myfunc) +y = vfunc([1, 2, 3, 4], 2) # Returns Tensor([3, 4, 5, 6]) +``` + +Args: + pyfunc: Callable of a single tensor argument. + excluded: Optional set of integers representing + positional arguments for which the function + will not be vectorized. + These will be passed directly to `pyfunc` unmodified. + signature: Optional generalized universal function signature, + e.g., `"(m,n),(n)->(m)"` for vectorized + matrix-vector multiplication. If provided, + `pyfunc` will be called with (and expected to return) + arrays with shapes given by the size of corresponding + core dimensions. By default, `pyfunc` is assumed + to take scalars tensors as input and output. + +Returns: + A new function that applies `pyfunc` to every element + of its input along axis 0 (the batch axis). + diff --git a/.tether/man/op_vectorized_map.txt b/.tether/man/op_vectorized_map.txt new file mode 100644 index 0000000000..3af8b4df0b --- /dev/null +++ b/.tether/man/op_vectorized_map.txt @@ -0,0 +1,30 @@ +__signature__ +keras.ops.vectorized_map(function, elements) +__doc__ +Parallel map of `function` on axis 0 of tensor(s) `elements`. + +Schematically, `vectorized_map` implements the following, +in the case of a single tensor input `elements`: + +```python +def vectorized_map(function, elements) + outputs = [] + for e in elements: + outputs.append(function(e)) + return stack(outputs) +``` + +In the case of an iterable of tensors `elements`, +it implements the following: + +```python +def vectorized_map(function, elements) + batch_size = elements[0].shape[0] + outputs = [] + for index in range(batch_size): + outputs.append(function([e[index] for e in elements])) + return np.stack(outputs) +``` + +In this case, `function` is expected to take as input +a single list of tensor arguments. diff --git a/.tether/man/op_vstack.txt b/.tether/man/op_vstack.txt new file mode 100644 index 0000000000..9fa444e4f1 --- /dev/null +++ b/.tether/man/op_vstack.txt @@ -0,0 +1,10 @@ +__signature__ +keras.ops.vstack(xs) +__doc__ +Stack tensors in sequence vertically (row wise). + +Args: + xs: Sequence of tensors. + +Returns: + Tensor formed by stacking the given tensors. diff --git a/.tether/man/op_where.txt b/.tether/man/op_where.txt new file mode 100644 index 0000000000..7f44adc2c9 --- /dev/null +++ b/.tether/man/op_where.txt @@ -0,0 +1,17 @@ +__signature__ +keras.ops.where( + condition, + x1=None, + x2=None +) +__doc__ +Return elements chosen from `x1` or `x2` depending on `condition`. + +Args: + condition: Where `True`, yield `x1`, otherwise yield `x2`. + x1: Values from which to choose when `condition` is `True`. + x2: Values from which to choose when `condition` is `False`. + +Returns: + A tensor with elements from `x1` where `condition` is `True`, and + elements from `x2` where `condition` is `False`. diff --git a/.tether/man/op_while_loop.txt b/.tether/man/op_while_loop.txt new file mode 100644 index 0000000000..f42eae87d6 --- /dev/null +++ b/.tether/man/op_while_loop.txt @@ -0,0 +1,43 @@ +__signature__ +keras.ops.while_loop( + cond, + body, + loop_vars, + maximum_iterations=None +) +__doc__ +While loop implementation. + +Args: + cond: A callable that represents the termination condition of the loop. + Must accept a `loop_vars` like structure as an argument. If + `loop_vars` is a tuple or list, each element of `loop_vars` will be + passed positionally to the callable. + body: A callable that represents the loop body. Must accept a + `loop_vars` like structure as an argument, and return update value + with the same structure. If `loop_vars` is a tuple or list, each + element of `loop_vars` will be passed positionally to the callable. + loop_vars: An arbitrary nested structure of tensor state to persist + across loop iterations. + maximum_iterations: Optional maximum number of iterations of the while + loop to run. If provided, the `cond` output is AND-ed with an + additional condition ensuring the number of iterations executed is + no greater than `maximum_iterations`. + +Returns: + A list/tuple of tensors, has the same shape and dtype as `inputs`. + +Examples: + +>>> i = 0 +>>> cond = lambda i: i < 10 +>>> body = lambda i: i + 1 +>>> keras.ops.while_loop(cond, body, i) +10 + +>>> x, y = 0, 1 +>>> cond = lambda x, y: x < 10 +>>> body = lambda x, y: (x + 1, y + 1) +>>> keras.ops.while_loop(cond, body, (x, y)) +10, 11 + diff --git a/.tether/man/op_zeros.txt b/.tether/man/op_zeros.txt new file mode 100644 index 0000000000..30207c6104 --- /dev/null +++ b/.tether/man/op_zeros.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.zeros(shape, dtype=None) +__doc__ +Return a new tensor of given shape and type, filled with zeros. + +Args: + shape: Shape of the new tensor. + dtype: Desired data type of the tensor. + +Returns: + Tensor of zeros with the given shape and dtype. diff --git a/.tether/man/op_zeros_like.txt b/.tether/man/op_zeros_like.txt new file mode 100644 index 0000000000..9890525da2 --- /dev/null +++ b/.tether/man/op_zeros_like.txt @@ -0,0 +1,11 @@ +__signature__ +keras.ops.zeros_like(x, dtype=None) +__doc__ +Return a tensor of zeros with the same shape and type as `x`. + +Args: + x: Input tensor. + dtype: Overrides the data type of the result. + +Returns: + A tensor of zeros with the same shape and type as `x`. diff --git a/.tether/man/optimizer_adadelta.txt b/.tether/man/optimizer_adadelta.txt new file mode 100644 index 0000000000..828fab4b4e --- /dev/null +++ b/.tether/man/optimizer_adadelta.txt @@ -0,0 +1,138 @@ +Help on class Adadelta in module keras.src.optimizers.adadelta: + +class Adadelta(keras.src.optimizers.optimizer.Optimizer) + | Adadelta(learning_rate=0.001, rho=0.95, epsilon=1e-07, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='adadelta', **kwargs) + | + | Optimizer that implements the Adadelta algorithm. + | + | Adadelta optimization is a stochastic gradient descent method that is based + | on adaptive learning rate per dimension to address two drawbacks: + | + | - The continual decay of learning rates throughout training. + | - The need for a manually selected global learning rate. + | + | Adadelta is a more robust extension of Adagrad that adapts learning rates + | based on a moving window of gradient updates, instead of accumulating all + | past gradients. This way, Adadelta continues learning even when many updates + | have been done. Compared to Adagrad, in the original version of Adadelta you + | don't have to set an initial learning rate. In this version, the initial + | learning rate can be set, as in most other Keras optimizers. + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. Note that `Adadelta` + | tends to benefit from higher initial learning rate values compared + | to other optimizers. To match the exact form in the original paper, + | use 1.0. + | rho: A floating point value. The decay rate. Defaults to `0.95`. + | epsilon: Small floating point value for maintaining numerical stability. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | Reference: + | + | - [Zeiler, 2012](http://arxiv.org/abs/1212.5701) + | + | Method resolution order: + | Adadelta + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | rho=0.95, + | epsilon=1e-07, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='adadelta', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | grad, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_adafactor.txt b/.tether/man/optimizer_adafactor.txt new file mode 100644 index 0000000000..79ae565519 --- /dev/null +++ b/.tether/man/optimizer_adafactor.txt @@ -0,0 +1,150 @@ +Help on class Adafactor in module keras.src.optimizers.adafactor: + +class Adafactor(keras.src.optimizers.optimizer.Optimizer) + | Adafactor(learning_rate=0.001, beta_2_decay=-0.8, epsilon_1=1e-30, epsilon_2=0.001, clip_threshold=1.0, relative_step=True, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='adafactor', **kwargs) + | + | Optimizer that implements the Adafactor algorithm. + | + | Adafactor is commonly used in NLP tasks, and has the advantage + | of taking less memory because it only saves partial information of previous + | gradients. + | + | The default argument setup is based on the original paper (see reference). + | When gradients are of dimension > 2, Adafactor optimizer will delete the + | last 2 dimensions separately in its accumulator variables. + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | beta_2_decay: float, defaults to -0.8. The decay rate of `beta_2`. + | epsilon_1: float, defaults to 1e-30. A small offset to keep denominator + | away from 0. + | epsilon_2: float, defaults to 1e-3. A small offset to avoid learning + | rate becoming too small by time. + | clip_threshold: float, defaults to 1.0. Clipping threshold. This is a + | part of Adafactor algorithm, independent from `clipnorm`, + | `clipvalue`, and `global_clipnorm`. + | relative_step: bool, defaults to `True`. If `learning_rate` is a + | constant and `relative_step=True`, learning rate will be adjusted + | based on current iterations. This is a default learning rate decay + | in Adafactor. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | Reference: + | + | - [Shazeer, Noam et al., 2018](https://arxiv.org/abs/1804.04235). + | + | Method resolution order: + | Adafactor + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | beta_2_decay=-0.8, + | epsilon_1=1e-30, + | epsilon_2=0.001, + | clip_threshold=1.0, + | relative_step=True, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='adafactor', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | Initialize optimizer variables. + | + | Adam optimizer has 3 types of variables: momentums, velocities and + | velocity_hat (only set when amsgrad is applied), + | + | Args: + | var_list: list of model variables to build Adam variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_adagrad.txt b/.tether/man/optimizer_adagrad.txt new file mode 100644 index 0000000000..bff3cdd865 --- /dev/null +++ b/.tether/man/optimizer_adagrad.txt @@ -0,0 +1,132 @@ +Help on class Adagrad in module keras.src.optimizers.adagrad: + +class Adagrad(keras.src.optimizers.optimizer.Optimizer) + | Adagrad(learning_rate=0.001, initial_accumulator_value=0.1, epsilon=1e-07, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='adagrad', **kwargs) + | + | Optimizer that implements the Adagrad algorithm. + | + | Adagrad is an optimizer with parameter-specific learning rates, + | which are adapted relative to how frequently a parameter gets + | updated during training. The more updates a parameter receives, + | the smaller the updates. + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. Note that `Adagrad` + | tends to benefit from higher initial learning rate values compared + | to other optimizers. To match the exact form in the original paper, + | use `1.0`. + | initial_accumulator_value: Floating point value. Starting value for the + | accumulators (per-parameter momentum values). Must be non-negative. + | epsilon: Small floating point value for maintaining numerical stability. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | Reference: + | + | - [Duchi et al., 2011]( + | http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). + | + | Method resolution order: + | Adagrad + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | initial_accumulator_value=0.1, + | epsilon=1e-07, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='adagrad', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_adam.txt b/.tether/man/optimizer_adam.txt new file mode 100644 index 0000000000..25622c63e0 --- /dev/null +++ b/.tether/man/optimizer_adam.txt @@ -0,0 +1,149 @@ +Help on class Adam in module keras.src.optimizers.adam: + +class Adam(keras.src.optimizers.optimizer.Optimizer) + | Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='adam', **kwargs) + | + | Optimizer that implements the Adam algorithm. + | + | Adam optimization is a stochastic gradient descent method that is based on + | adaptive estimation of first-order and second-order moments. + | + | According to + | [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), + | the method is "*computationally + | efficient, has little memory requirement, invariant to diagonal rescaling of + | gradients, and is well suited for problems that are large in terms of + | data/parameters*". + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | beta_1: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | exponential decay rate for the 1st moment estimates. Defaults to + | `0.9`. + | beta_2: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | exponential decay rate for the 2nd moment estimates. Defaults to + | `0.999`. + | epsilon: A small constant for numerical stability. This epsilon is + | "epsilon hat" in the Kingma and Ba paper (in the formula just before + | Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults + | to `1e-7`. + | amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm + | from the paper "On the Convergence of Adam and beyond". Defaults + | to `False`. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | Method resolution order: + | Adam + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | beta_1=0.9, + | beta_2=0.999, + | epsilon=1e-07, + | amsgrad=False, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='adam', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | Initialize optimizer variables. + | + | Adam optimizer has 3 types of variables: momentums, velocities and + | velocity_hat (only set when amsgrad is applied), + | + | Args: + | var_list: list of model variables to build Adam variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_adam_w.txt b/.tether/man/optimizer_adam_w.txt new file mode 100644 index 0000000000..ec6c92cd3d --- /dev/null +++ b/.tether/man/optimizer_adam_w.txt @@ -0,0 +1,130 @@ +Help on class AdamW in module keras.src.optimizers.adamw: + +class AdamW(keras.src.optimizers.adam.Adam) + | AdamW(learning_rate=0.001, weight_decay=0.004, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='adamw', **kwargs) + | + | Optimizer that implements the AdamW algorithm. + | + | AdamW optimization is a stochastic gradient descent method that is based on + | adaptive estimation of first-order and second-order moments with an added + | method to decay weights per the techniques discussed in the paper, + | 'Decoupled Weight Decay Regularization' by + | [Loshchilov, Hutter et al., 2019](https://arxiv.org/abs/1711.05101). + | + | According to + | [Kingma et al., 2014](http://arxiv.org/abs/1412.6980), + | the underying Adam method is "*computationally + | efficient, has little memory requirement, invariant to diagonal rescaling of + | gradients, and is well suited for problems that are large in terms of + | data/parameters*". + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | beta_1: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | exponential decay rate for the 1st moment estimates. + | Defaults to `0.9`. + | beta_2: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | exponential decay rate for the 2nd moment estimates. + | Defaults to `0.999`. + | epsilon: A small constant for numerical stability. This epsilon is + | "epsilon hat" in the Kingma and Ba paper (in the formula just + | before Section 2.1), not the epsilon in Algorithm 1 of the paper. + | Defaults to 1e-7. + | amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm + | from the paper "On the Convergence of Adam and beyond". + | Defaults to `False`. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | References: + | + | - [Loshchilov et al., 2019](https://arxiv.org/abs/1711.05101) + | - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) for `adam` + | - [Reddi et al., 2018]( + | https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. + | + | Method resolution order: + | AdamW + | keras.src.optimizers.adam.Adam + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | weight_decay=0.004, + | beta_1=0.9, + | beta_2=0.999, + | epsilon=1e-07, + | amsgrad=False, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='adamw', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + diff --git a/.tether/man/optimizer_adamax.txt b/.tether/man/optimizer_adamax.txt new file mode 100644 index 0000000000..cf944d82d5 --- /dev/null +++ b/.tether/man/optimizer_adamax.txt @@ -0,0 +1,159 @@ +Help on class Adamax in module keras.src.optimizers.adamax: + +class Adamax(keras.src.optimizers.optimizer.Optimizer) + | Adamax(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='adamax', **kwargs) + | + | Optimizer that implements the Adamax algorithm. + | + | Adamax, a variant of Adam based on the infinity norm, is a first-order + | gradient-based optimization method. Due to its capability of adjusting the + | learning rate based on data characteristics, it is suited to learn + | time-variant process, e.g., speech data with dynamically changed noise + | conditions. Default parameters follow those provided in the paper (see + | references below). + | + | Initialization: + | + | ```python + | m = 0 # Initialize initial 1st moment vector + | u = 0 # Initialize the exponentially weighted infinity norm + | t = 0 # Initialize timestep + | ``` + | + | The update rule for parameter `w` with gradient `g` is described at the end + | of section 7.1 of the paper (see the referenece section): + | + | ```python + | t += 1 + | m = beta1 * m + (1 - beta) * g + | u = max(beta2 * u, abs(g)) + | current_lr = learning_rate / (1 - beta1 ** t) + | w = w - current_lr * m / (u + epsilon) + | ``` + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | beta_1: A float value or a constant float tensor. The exponential decay + | rate for the 1st moment estimates. + | beta_2: A float value or a constant float tensor. The exponential decay + | rate for the exponentially weighted infinity norm. + | epsilon: A small constant for numerical stability. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | Reference: + | + | - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980) + | + | Method resolution order: + | Adamax + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | beta_1=0.9, + | beta_2=0.999, + | epsilon=1e-07, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='adamax', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | Initialize optimizer variables. + | + | Adamax optimizer has 2 types of variables: momentums (denoted as m), + | exponentially weighted infinity norm (denoted as u). + | + | Args: + | var_list: list of model variables to build Adamax variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_ftrl.txt b/.tether/man/optimizer_ftrl.txt new file mode 100644 index 0000000000..38b53c80b8 --- /dev/null +++ b/.tether/man/optimizer_ftrl.txt @@ -0,0 +1,182 @@ +Help on class Ftrl in module keras.src.optimizers.ftrl: + +class Ftrl(keras.src.optimizers.optimizer.Optimizer) + | Ftrl(learning_rate=0.001, learning_rate_power=-0.5, initial_accumulator_value=0.1, l1_regularization_strength=0.0, l2_regularization_strength=0.0, l2_shrinkage_regularization_strength=0.0, beta=0.0, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='ftrl', **kwargs) + | + | Optimizer that implements the FTRL algorithm. + | + | "Follow The Regularized Leader" (FTRL) is an optimization algorithm + | developed at Google for click-through rate prediction in the early 2010s. It + | is most suitable for shallow models with large and sparse feature spaces. + | The algorithm is described by + | [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf). + | The Keras version has support for both online L2 regularization + | (the L2 regularization described in the paper + | above) and shrinkage-type L2 regularization + | (which is the addition of an L2 penalty to the loss function). + | + | Initialization: + | + | ```python + | n = 0 + | sigma = 0 + | z = 0 + | ``` + | + | Update rule for one variable `w`: + | + | ```python + | prev_n = n + | n = n + g ** 2 + | sigma = (n ** -lr_power - prev_n ** -lr_power) / lr + | z = z + g - sigma * w + | if abs(z) < lambda_1: + | w = 0 + | else: + | w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2) + | ``` + | + | Notation: + | + | - `lr` is the learning rate + | - `g` is the gradient for the variable + | - `lambda_1` is the L1 regularization strength + | - `lambda_2` is the L2 regularization strength + | - `lr_power` is the power to scale n. + | + | Check the documentation for the `l2_shrinkage_regularization_strength` + | parameter for more details when shrinkage is enabled, in which case gradient + | is replaced with a gradient with shrinkage. + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | learning_rate_power: A float value, must be less or equal to zero. + | Controls how the learning rate decreases during training. Use zero + | for a fixed learning rate. + | initial_accumulator_value: The starting value for accumulators. Only + | zero or positive values are allowed. + | l1_regularization_strength: A float value, must be greater than or equal + | to zero. Defaults to `0.0`. + | l2_regularization_strength: A float value, must be greater than or equal + | to zero. Defaults to `0.0`. + | l2_shrinkage_regularization_strength: A float value, must be greater + | than or equal to zero. This differs from L2 above in that the L2 + | above is a stabilization penalty, whereas this L2 shrinkage is a + | magnitude penalty. When input is sparse shrinkage will only happen + | on the active weights. + | beta: A float value, representing the beta value from the paper. + | Defaults to `0.0`. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | Method resolution order: + | Ftrl + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | learning_rate_power=-0.5, + | initial_accumulator_value=0.1, + | l1_regularization_strength=0.0, + | l2_regularization_strength=0.0, + | l2_shrinkage_regularization_strength=0.0, + | beta=0.0, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='ftrl', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | Initialize optimizer variables. + | + | Args: + | var_list: list of model variables to build Ftrl variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_lion.txt b/.tether/man/optimizer_lion.txt new file mode 100644 index 0000000000..f7f4b35021 --- /dev/null +++ b/.tether/man/optimizer_lion.txt @@ -0,0 +1,147 @@ +Help on class Lion in module keras.src.optimizers.lion: + +class Lion(keras.src.optimizers.optimizer.Optimizer) + | Lion(learning_rate=0.001, beta_1=0.9, beta_2=0.99, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='lion', **kwargs) + | + | Optimizer that implements the Lion algorithm. + | + | The Lion optimizer is a stochastic-gradient-descent method that uses the + | sign operator to control the magnitude of the update, unlike other adaptive + | optimizers such as Adam that rely on second-order moments. This make + | Lion more memory-efficient as it only keeps track of the momentum. According + | to the authors (see reference), its performance gain over Adam grows with + | the batch size. Because the update of Lion is produced through the sign + | operation, resulting in a larger norm, a suitable learning rate for Lion is + | typically 3-10x smaller than that for AdamW. The weight decay for Lion + | should be in turn 3-10x larger than that for AdamW to maintain a + | similar strength (lr * wd). + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | beta_1: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | rate to combine the current gradient and the 1st moment estimate. + | Defaults to `0.9`. + | beta_2: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | exponential decay rate for the 1st moment estimate. Defaults to + | `0.99`. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | References: + | + | - [Chen et al., 2023](http://arxiv.org/abs/2302.06675) + | - [Authors' implementation]( + | http://github.com/google/automl/tree/master/lion) + | + | Method resolution order: + | Lion + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | beta_1=0.9, + | beta_2=0.99, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='lion', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | Initialize optimizer variables. + | + | Lion optimizer has one variable `momentums`. + | + | Args: + | var_list: list of model variables to build Lion variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_loss_scale.txt b/.tether/man/optimizer_loss_scale.txt new file mode 100644 index 0000000000..e048132740 --- /dev/null +++ b/.tether/man/optimizer_loss_scale.txt @@ -0,0 +1,181 @@ +Help on class LossScaleOptimizer in module keras.src.optimizers.loss_scale_optimizer: + +class LossScaleOptimizer(keras.src.optimizers.optimizer.Optimizer) + | LossScaleOptimizer(inner_optimizer, initial_scale=32768.0, dynamic_growth_steps=2000, **kwargs) + | + | An optimizer that dynamically scales the loss to prevent underflow. + | + | Loss scaling is a technique to prevent numeric underflow in intermediate + | gradients when float16 is used. To prevent underflow, the loss is multiplied + | (or "scaled") by a certain factor called the "loss scale", which causes + | intermediate gradients to be scaled by the loss scale as well. The final + | gradients are divided (or "unscaled") by the loss scale to bring them back + | to their original value. + | + | `LossScaleOptimizer` wraps another optimizer and applies dynamic loss + | scaling to it. This loss scale is dynamically updated over time as follows: + | - On any train step, if a nonfinite gradient is encountered, the loss scale + | is halved, and the train step is skipped. + | - If `dynamic_growth_steps` have ocurred since the last time the loss scale + | was updated, and no nonfinite gradients have occurred, the loss scale + | is doubled. + | + | Args: + | inner_optimizer: The `keras.optimizers.Optimizer` instance to wrap. + | initial_scale: Float. The initial loss scale. This scale will be updated + | during training. It is recommended for this to be a very high + | number, because a loss scale that is too high gets lowered far more + | quickly than a loss scale that is too low gets raised. + | dynamic_growth_steps: Int. How often to update the scale upwards. After + | every `dynamic_growth_steps` steps with finite gradients, the + | loss scale is doubled. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | Method resolution order: + | LossScaleOptimizer + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | inner_optimizer, + | initial_scale=32768.0, + | dynamic_growth_steps=2000, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | apply( + | self, + | grads, + | trainable_variables=None + | ) + | Update traininable variables according to provided gradient values. + | + | `grads` should be a list of gradient tensors + | with 1:1 mapping to the list of variables the optimizer was built with. + | + | `trainable_variables` can be provided + | on the first call to build the optimizer. + | + | build(self, var_list) + | + | check_finite(self, grads) + | + | finalize_variable_values(self, var_list) + | Set the final value of model's trainable variables. + | + | Sometimes there are some extra steps before ending the variable updates, + | such as overriding the model variables with its average value. + | + | Args: + | var_list: list of model variables. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | scale_loss(self, loss) + | Scale the loss before computing gradients. + | + | Scales the loss before gradients are computed in a `train_step`. This + | is primarily useful during mixed precision training to prevent numeric + | underflow. + | + | stateless_apply( + | self, + | optimizer_variables, + | grads, + | trainable_variables + | ) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates an optimizer from its config. + | + | This method is the reverse of `get_config`, capable of instantiating the + | same optimizer from the config dictionary. + | + | Args: + | config: A Python dictionary, typically the output of get_config. + | custom_objects: A Python dictionary mapping names to additional + | user-defined Python objects needed to recreate this optimizer. + | + | Returns: + | An optimizer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | variables + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | learning_rate + | + diff --git a/.tether/man/optimizer_nadam.txt b/.tether/man/optimizer_nadam.txt new file mode 100644 index 0000000000..5890b8c9c6 --- /dev/null +++ b/.tether/man/optimizer_nadam.txt @@ -0,0 +1,142 @@ +Help on class Nadam in module keras.src.optimizers.nadam: + +class Nadam(keras.src.optimizers.optimizer.Optimizer) + | Nadam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='nadam', **kwargs) + | + | Optimizer that implements the Nadam algorithm. + | + | Much like Adam is essentially RMSprop with momentum, Nadam is Adam with + | Nesterov momentum. + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | beta_1: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | exponential decay rate for the 1st moment estimates. + | Defaults to `0.9`. + | beta_2: A float value or a constant float tensor, or a callable + | that takes no arguments and returns the actual value to use. The + | exponential decay rate for the 2nd moment estimates. Defaults to + | `0.999`. + | epsilon: A small constant for numerical stability. This epsilon is + | "epsilon hat" in the Kingma and Ba paper (in the formula just before + | Section 2.1), not the epsilon in Algorithm 1 of the paper. + | Defaults to `1e-7`. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | Reference: + | + | - [Dozat, 2015](http://cs229.stanford.edu/proj2015/054_report.pdf). + | + | Method resolution order: + | Nadam + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | beta_1=0.9, + | beta_2=0.999, + | epsilon=1e-07, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='nadam', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | Initialize optimizer variables. + | + | Nadam optimizer has 2 types of variables: momentums and velocities. + | + | Args: + | var_list: list of model variables to build Nadam variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_rmsprop.txt b/.tether/man/optimizer_rmsprop.txt new file mode 100644 index 0000000000..a6a84fe100 --- /dev/null +++ b/.tether/man/optimizer_rmsprop.txt @@ -0,0 +1,153 @@ +Help on class RMSprop in module keras.src.optimizers.rmsprop: + +class RMSprop(keras.src.optimizers.optimizer.Optimizer) + | RMSprop(learning_rate=0.001, rho=0.9, momentum=0.0, epsilon=1e-07, centered=False, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='rmsprop', **kwargs) + | + | Optimizer that implements the RMSprop algorithm. + | + | The gist of RMSprop is to: + | + | - Maintain a moving (discounted) average of the square of gradients + | - Divide the gradient by the root of this average + | + | This implementation of RMSprop uses plain momentum, not Nesterov momentum. + | + | The centered version additionally maintains a moving average of the + | gradients, and uses that average to estimate the variance. + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.001`. + | rho: float, defaults to 0.9. Discounting factor for the old gradients. + | momentum: float, defaults to 0.0. If not 0.0., the optimizer tracks the + | momentum value, with a decay rate equals to `1 - momentum`. + | epsilon: A small constant for numerical stability. This epsilon is + | "epsilon hat" in the Kingma and Ba paper (in the formula just before + | Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults + | to 1e-7. + | centered: Boolean. If `True`, gradients are normalized by the estimated + | variance of the gradient; if False, by the uncentered second moment. + | Setting this to `True` may help with training, but is slightly more + | expensive in terms of computation and memory. Defaults to `False`. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | + | Example: + | + | >>> opt = keras.optimizers.RMSprop(learning_rate=0.1) + | >>> var1 = keras.backend.Variable(10.0) + | >>> loss = lambda: (var1 ** 2) / 2.0 # d(loss) / d(var1) = var1 + | >>> opt.minimize(loss, [var1]) + | >>> var1 + | 9.683772 + | + | Reference: + | + | - [Hinton, 2012]( + | http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) + | + | Method resolution order: + | RMSprop + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.001, + | rho=0.9, + | momentum=0.0, + | epsilon=1e-07, + | centered=False, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='rmsprop', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, var_list) + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/optimizer_sgd.txt b/.tether/man/optimizer_sgd.txt new file mode 100644 index 0000000000..ec74522ba0 --- /dev/null +++ b/.tether/man/optimizer_sgd.txt @@ -0,0 +1,147 @@ +Help on class SGD in module keras.src.optimizers.sgd: + +class SGD(keras.src.optimizers.optimizer.Optimizer) + | SGD(learning_rate=0.01, momentum=0.0, nesterov=False, weight_decay=None, clipnorm=None, clipvalue=None, global_clipnorm=None, use_ema=False, ema_momentum=0.99, ema_overwrite_frequency=None, loss_scale_factor=None, gradient_accumulation_steps=None, name='SGD', **kwargs) + | + | Gradient descent (with momentum) optimizer. + | + | Update rule for parameter `w` with gradient `g` when `momentum` is 0: + | + | ```python + | w = w - learning_rate * g + | ``` + | + | Update rule when `momentum` is larger than 0: + | + | ```python + | velocity = momentum * velocity - learning_rate * g + | w = w + velocity + | ``` + | + | When `nesterov=True`, this rule becomes: + | + | ```python + | velocity = momentum * velocity - learning_rate * g + | w = w + momentum * velocity - learning_rate * g + | ``` + | + | Args: + | learning_rate: A float, a + | `keras.optimizers.schedules.LearningRateSchedule` instance, or + | a callable that takes no arguments and returns the actual value to + | use. The learning rate. Defaults to `0.01`. + | momentum: float hyperparameter >= 0 that accelerates gradient descent in + | the relevant direction and dampens oscillations. 0 is vanilla + | gradient descent. Defaults to `0.0`. + | nesterov: boolean. Whether to apply Nesterov momentum. + | Defaults to `False`. + | name: String. The name to use + | for momentum accumulator weights created by + | the optimizer. + | weight_decay: Float. If set, weight decay is applied. + | clipnorm: Float. If set, the gradient of each weight is individually + | clipped so that its norm is no higher than this value. + | clipvalue: Float. If set, the gradient of each weight is clipped to be + | no higher than this value. + | global_clipnorm: Float. If set, the gradient of all weights is clipped + | so that their global norm is no higher than this value. + | use_ema: Boolean, defaults to `False`. + | If `True`, exponential moving average + | (EMA) is applied. EMA consists of computing an exponential moving + | average of the weights of the model (as the weight values change + | after each training batch), and periodically overwriting the + | weights with their moving average. + | ema_momentum: Float, defaults to 0.99. Only used if `use_ema=True`. + | This is the momentum to use when computing + | the EMA of the model's weights: + | `new_average = ema_momentum * old_average + (1 - ema_momentum) * + | current_variable_value`. + | ema_overwrite_frequency: Int or None, defaults to None. Only used if + | `use_ema=True`. Every `ema_overwrite_frequency` steps of iterations, + | we overwrite the model variable by its moving average. + | If None, the optimizer + | does not overwrite model variables in the middle of training, + | and you need to explicitly overwrite the variables + | at the end of training by calling + | `optimizer.finalize_variable_values()` (which updates the model + | variables in-place). When using the built-in `fit()` training loop, + | this happens automatically after the last epoch, + | and you don't need to do anything. + | loss_scale_factor: Float or `None`. If a float, the scale factor will + | be multiplied the loss before computing gradients, and the inverse + | of the scale factor will be multiplied by the gradients before + | updating variables. Useful for preventing underflow during + | mixed precision training. Alternately, + | `keras.optimizers.LossScaleOptimizer` will + | automatically set a loss scale factor. + | gradient_accumulation_steps: Int or `None`. If an int, model & optimizer + | variables will not be updated at every step; instead they will be + | updated every `gradient_accumulation_steps` steps, using the average + | value of the gradients since the last update. This is known as + | "gradient accumulation". This can be useful + | when your batch size is very small, in order to reduce gradient + | noise at each update step. + | + | Method resolution order: + | SGD + | keras.src.optimizers.optimizer.Optimizer + | keras.src.backend.tensorflow.optimizer.TFOptimizer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.optimizers.base_optimizer.BaseOptimizer + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | learning_rate=0.01, + | momentum=0.0, + | nesterov=False, + | weight_decay=None, + | clipnorm=None, + | clipvalue=None, + | global_clipnorm=None, + | use_ema=False, + | ema_momentum=0.99, + | ema_overwrite_frequency=None, + | loss_scale_factor=None, + | gradient_accumulation_steps=None, + | name='SGD', + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, variables) + | Initialize optimizer variables. + | + | SGD optimizer has one variable `momentums`, only set if `self.momentum` + | is not 0. + | + | Args: + | var_list: list of model variables to build SGD variables on. + | + | get_config(self) + | Returns the config of the optimizer. + | + | An optimizer config is a Python dictionary (serializable) + | containing the configuration of an optimizer. + | The same optimizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | Subclass optimizer should override this method to include other + | hyperparameters. + | + | Returns: + | Python dictionary. + | + | update_step( + | self, + | gradient, + | variable, + | learning_rate + | ) + | Update step given gradient and the associated model variable. + | + diff --git a/.tether/man/pack_x_y_sample_weight.txt b/.tether/man/pack_x_y_sample_weight.txt new file mode 100644 index 0000000000..e393f42890 --- /dev/null +++ b/.tether/man/pack_x_y_sample_weight.txt @@ -0,0 +1,32 @@ +__signature__ +keras.utils.pack_x_y_sample_weight( + x, + y=None, + sample_weight=None +) +__doc__ +Packs user-provided data into a tuple. + +This is a convenience utility for packing data into the tuple formats +that `Model.fit()` uses. + +Example: + +>>> x = ops.ones((10, 1)) +>>> data = pack_x_y_sample_weight(x) +>>> isinstance(data, ops.Tensor) +True +>>> y = ops.ones((10, 1)) +>>> data = pack_x_y_sample_weight(x, y) +>>> isinstance(data, tuple) +True +>>> x, y = data + +Args: + x: Features to pass to `Model`. + y: Ground-truth targets to pass to `Model`. + sample_weight: Sample weight for each element. + +Returns: + Tuple in the format used in `Model.fit()`. + diff --git a/.tether/man/pad_sequences.txt b/.tether/man/pad_sequences.txt new file mode 100644 index 0000000000..bc45a58b87 --- /dev/null +++ b/.tether/man/pad_sequences.txt @@ -0,0 +1,66 @@ +__signature__ +keras.utils.pad_sequences( + sequences, + maxlen=None, + dtype='int32', + padding='pre', + truncating='pre', + value=0.0 +) +__doc__ +Pads sequences to the same length. + +This function transforms a list (of length `num_samples`) +of sequences (lists of integers) +into a 2D NumPy array of shape `(num_samples, num_timesteps)`. +`num_timesteps` is either the `maxlen` argument if provided, +or the length of the longest sequence in the list. + +Sequences that are shorter than `num_timesteps` +are padded with `value` until they are `num_timesteps` long. + +Sequences longer than `num_timesteps` are truncated +so that they fit the desired length. + +The position where padding or truncation happens is determined by +the arguments `padding` and `truncating`, respectively. +Pre-padding or removing values from the beginning of the sequence is the +default. + +>>> sequence = [[1], [2, 3], [4, 5, 6]] +>>> keras.utils.pad_sequences(sequence) +array([[0, 0, 1], + [0, 2, 3], + [4, 5, 6]], dtype=int32) + +>>> keras.utils.pad_sequences(sequence, value=-1) +array([[-1, -1, 1], + [-1, 2, 3], + [ 4, 5, 6]], dtype=int32) + +>>> keras.utils.pad_sequences(sequence, padding='post') +array([[1, 0, 0], + [2, 3, 0], + [4, 5, 6]], dtype=int32) + +>>> keras.utils.pad_sequences(sequence, maxlen=2) +array([[0, 1], + [2, 3], + [5, 6]], dtype=int32) + +Args: + sequences: List of sequences (each sequence is a list of integers). + maxlen: Optional Int, maximum length of all sequences. If not provided, + sequences will be padded to the length of the longest individual + sequence. + dtype: (Optional, defaults to `"int32"`). Type of the output sequences. + To pad sequences with variable length strings, you can use `object`. + padding: String, "pre" or "post" (optional, defaults to `"pre"`): + pad either before or after each sequence. + truncating: String, "pre" or "post" (optional, defaults to `"pre"`): + remove values from sequences larger than + `maxlen`, either at the beginning or at the end of the sequences. + value: Float or String, padding value. (Optional, defaults to 0.) + +Returns: + NumPy array with shape `(len(sequences), maxlen)` diff --git a/.tether/man/plot.keras.src.models.model.Model.txt b/.tether/man/plot.keras.src.models.model.Model.txt new file mode 100644 index 0000000000..59598da22f --- /dev/null +++ b/.tether/man/plot.keras.src.models.model.Model.txt @@ -0,0 +1,38 @@ +__signature__ +keras.utils.model_to_dot( + model, + show_shapes=False, + show_dtype=False, + show_layer_names=True, + rankdir='TB', + expand_nested=False, + dpi=200, + subgraph=False, + show_layer_activations=False, + show_trainable=False, + **kwargs +) +__doc__ +Convert a Keras model to dot format. + +Args: + model: A Keras model instance. + show_shapes: whether to display shape information. + show_dtype: whether to display layer dtypes. + show_layer_names: whether to display layer names. + rankdir: `rankdir` argument passed to PyDot, + a string specifying the format of the plot: `"TB"` + creates a vertical plot; `"LR"` creates a horizontal plot. + expand_nested: whether to expand nested Functional models + into clusters. + dpi: Image resolution in dots per inch. + subgraph: whether to return a `pydot.Cluster` instance. + show_layer_activations: Display layer activations (only for layers that + have an `activation` property). + show_trainable: whether to display if a layer is trainable. + +Returns: + A `pydot.Dot` instance representing the Keras model or + a `pydot.Cluster` instance representing nested model if + `subgraph=True`. + diff --git a/.tether/man/predict.keras.src.models.model.Model.txt b/.tether/man/predict.keras.src.models.model.Model.txt new file mode 100644 index 0000000000..ae4caab191 --- /dev/null +++ b/.tether/man/predict.keras.src.models.model.Model.txt @@ -0,0 +1,59 @@ +__signature__ +keras.Model.predict( + self, + x, + batch_size=None, + verbose='auto', + steps=None, + callbacks=None +) +__doc__ +Generates output predictions for the input samples. + +Computation is done in batches. This method is designed for batch +processing of large numbers of inputs. It is not intended for use inside +of loops that iterate over your data and process small numbers of inputs +at a time. + +For small numbers of inputs that fit in one batch, +directly use `__call__()` for faster execution, e.g., +`model(x)`, or `model(x, training=False)` if you have layers such as +`BatchNormalization` that behave differently during +inference. + +Note: See [this FAQ entry]( +https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call) +for more details about the difference between `Model` methods +`predict()` and `__call__()`. + +Args: + x: Input samples. It could be: + - A NumPy array (or array-like), or a list of arrays + (in case the model has multiple inputs). + - A tensor, or a list of tensors + (in case the model has multiple inputs). + - A `tf.data.Dataset`. + - A `keras.utils.PyDataset` instance. + batch_size: Integer or `None`. + Number of samples per batch. + If unspecified, `batch_size` will default to 32. + Do not specify the `batch_size` if your data is in the + form of dataset, generators, or `keras.utils.PyDataset` + instances (since they generate batches). + verbose: `"auto"`, 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = single line. + `"auto"` becomes 1 for most cases. Note that the progress bar + is not particularly useful when logged to a file, + so `verbose=2` is recommended when not running interactively + (e.g. in a production environment). Defaults to `"auto"`. + steps: Total number of steps (batches of samples) + before declaring the prediction round finished. + Ignored with the default value of `None`. + If `x` is a `tf.data.Dataset` and `steps` is `None`, + `predict()` will run until the input dataset is exhausted. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during prediction. + +Returns: + NumPy array(s) of predictions. + diff --git a/.tether/man/predict_on_batch.txt b/.tether/man/predict_on_batch.txt new file mode 100644 index 0000000000..861213dc2e --- /dev/null +++ b/.tether/man/predict_on_batch.txt @@ -0,0 +1,11 @@ +__signature__ +keras.Model.predict_on_batch(self, x) +__doc__ +Returns predictions for a single batch of samples. + +Args: + x: Input data. It must be array-like. + +Returns: + NumPy array(s) of predictions. + diff --git a/.tether/man/quantize_weights.txt b/.tether/man/quantize_weights.txt new file mode 100644 index 0000000000..e1825477ce --- /dev/null +++ b/.tether/man/quantize_weights.txt @@ -0,0 +1,13 @@ +__signature__ +keras.Model.quantize(self, mode) +__doc__ +Quantize the weights of the model. + +Note that the model must be built first before calling this method. +`quantize` will recursively call `quantize(mode)` in all layers and +will be skipped if the layer doesn't implement the function. + +Args: + mode: The mode of the quantization. Only 'int8' is supported at this + time. + diff --git a/.tether/man/random_beta.txt b/.tether/man/random_beta.txt new file mode 100644 index 0000000000..c3b3284269 --- /dev/null +++ b/.tether/man/random_beta.txt @@ -0,0 +1,33 @@ +__signature__ +keras.random.beta( + shape, + alpha, + beta, + dtype=None, + seed=None +) +__doc__ +Draw samples from a Beta distribution. + +The values are drawm from a Beta distribution parametrized +by alpha and beta. + +Args: + shape: The shape of the random values to generate. + alpha: Float or an array of floats representing the first + parameter alpha. Must be broadcastable with `beta` and `shape`. + beta: Float or an array of floats representing the second + parameter beta. Must be broadcastable with `alpha` and `shape`. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `keras.config.floatx()` is used, + which defaults to `float32` unless you configured it otherwise (via + `keras.config.set_floatx(float_dtype)`). + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. + diff --git a/.tether/man/random_binomial.txt b/.tether/man/random_binomial.txt new file mode 100644 index 0000000000..8847bc23aa --- /dev/null +++ b/.tether/man/random_binomial.txt @@ -0,0 +1,34 @@ +__signature__ +keras.random.binomial( + shape, + counts, + probabilities, + dtype=None, + seed=None +) +__doc__ +Draw samples from a Binomial distribution. + +The values are drawn from a Binomial distribution with +specified trial count and probability of success. + +Args: + shape: The shape of the random values to generate. + counts: A number or array of numbers representing the + number of trials. It must be broadcastable with `probabilities`. + probabilities: A float or array of floats representing the + probability of success of an individual event. + It must be broadcastable with `counts`. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `keras.config.floatx()` is used, + which defaults to `float32` unless you configured it otherwise (via + `keras.config.set_floatx(float_dtype)`). + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. + diff --git a/.tether/man/random_categorical.txt b/.tether/man/random_categorical.txt new file mode 100644 index 0000000000..827b56f43a --- /dev/null +++ b/.tether/man/random_categorical.txt @@ -0,0 +1,39 @@ +__signature__ +keras.random.categorical( + logits, + num_samples, + dtype='int32', + seed=None +) +__doc__ +Draws samples from a categorical distribution. + +This function takes as input `logits`, a 2-D input tensor with shape +(batch_size, num_classes). Each row of the input represents a categorical +distribution, with each column index containing the log-probability for a +given class. + +The function will output a 2-D tensor with shape (batch_size, num_samples), +where each row contains samples from the corresponding row in `logits`. +Each column index contains an independent samples drawn from the input +distribution. + +Args: + logits: 2-D Tensor with shape (batch_size, num_classes). Each row + should define a categorical distibution with the unnormalized + log-probabilities for all classes. + num_samples: Int, the number of independent samples to draw for each + row of the input. This will be the second dimension of the output + tensor's shape. + dtype: Optional dtype of the output tensor. + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. + +Returns: + A 2-D tensor with (batch_size, num_samples). diff --git a/.tether/man/random_dropout.txt b/.tether/man/random_dropout.txt new file mode 100644 index 0000000000..b91d6a2adc --- /dev/null +++ b/.tether/man/random_dropout.txt @@ -0,0 +1,8 @@ +__signature__ +keras.random.dropout( + inputs, + rate, + noise_shape=None, + seed=None +) +__doc__ diff --git a/.tether/man/random_gamma.txt b/.tether/man/random_gamma.txt new file mode 100644 index 0000000000..b2e7941400 --- /dev/null +++ b/.tether/man/random_gamma.txt @@ -0,0 +1,25 @@ +__signature__ +keras.random.gamma( + shape, + alpha, + dtype=None, + seed=None +) +__doc__ +Draw random samples from the Gamma distribution. + +Args: + shape: The shape of the random values to generate. + alpha: Float, the parameter of the distribution. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `keras.config.floatx()` is used, + which defaults to `float32` unless you configured it otherwise (via + `keras.config.set_floatx(float_dtype)`). + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. diff --git a/.tether/man/random_integer.txt b/.tether/man/random_integer.txt new file mode 100644 index 0000000000..aa05f10723 --- /dev/null +++ b/.tether/man/random_integer.txt @@ -0,0 +1,35 @@ +__signature__ +keras.random.randint( + shape, + minval, + maxval, + dtype='int32', + seed=None +) +__doc__ +Draw random integers from a uniform distribution. + +The generated values follow a uniform distribution in the range +`[minval, maxval)`. The lower bound `minval` is included in the range, +while the upper bound `maxval` is excluded. + +`dtype` must be an integer type. + +Args: + shape: The shape of the random values to generate. + minval: Float, defaults to 0. Lower bound of the range of + random values to generate (inclusive). + maxval: Float, defaults to 1. Upper bound of the range of + random values to generate (exclusive). + dtype: Optional dtype of the tensor. Only integer types are + supported. If not specified, `keras.config.floatx()` is used, + which defaults to `float32` unless you configured it otherwise (via + `keras.config.set_floatx(float_dtype)`) + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. diff --git a/.tether/man/random_normal.txt b/.tether/man/random_normal.txt new file mode 100644 index 0000000000..e64e3eea14 --- /dev/null +++ b/.tether/man/random_normal.txt @@ -0,0 +1,28 @@ +__signature__ +keras.random.normal( + shape, + mean=0.0, + stddev=1.0, + dtype=None, + seed=None +) +__doc__ +Draw random samples from a normal (Gaussian) distribution. + +Args: + shape: The shape of the random values to generate. + mean: Float, defaults to 0. Mean of the random values to generate. + stddev: Float, defaults to 1. Standard deviation of the random values + to generate. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `keras.config.floatx()` is used, + which defaults to `float32` unless you configured it otherwise (via + `keras.config.set_floatx(float_dtype)`). + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. diff --git a/.tether/man/random_seed_generator.txt b/.tether/man/random_seed_generator.txt new file mode 100644 index 0000000000..e44a3d8c02 --- /dev/null +++ b/.tether/man/random_seed_generator.txt @@ -0,0 +1,70 @@ +Help on class SeedGenerator in module keras.src.random.seed_generator: + +class SeedGenerator(builtins.object) + | SeedGenerator( + | seed=None, + | name=None, + | **kwargs + | ) + | + | Generates variable seeds upon each call to a RNG-using function. + | + | In Keras, all RNG-using methods (such as `keras.random.normal()`) + | are stateless, meaning that if you pass an integer seed to them + | (such as `seed=42`), they will return the same values at each call. + | In order to get different values at each call, you must use a + | `SeedGenerator` instead as the seed argument. The `SeedGenerator` + | object is stateful. + | + | Example: + | + | ```python + | seed_gen = keras.random.SeedGenerator(seed=42) + | values = keras.random.normal(shape=(2, 3), seed=seed_gen) + | new_values = keras.random.normal(shape=(2, 3), seed=seed_gen) + | ``` + | + | Usage in a layer: + | + | ```python + | class Dropout(keras.Layer): + | def __init__(self, **kwargs): + | super().__init__(**kwargs) + | self.seed_generator = keras.random.SeedGenerator(1337) + | + | def call(self, x, training=False): + | if training: + | return keras.random.dropout( + | x, rate=0.5, seed=self.seed_generator + | ) + | return x + | ``` + | + | Methods defined here: + | + | __init__( + | self, + | seed=None, + | name=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | + | next(self, ordered=True) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config) from builtins.type + | + | ---------------------------------------------------------------------- + | Data descriptors defined here: + | + | __dict__ + | dictionary for instance variables + | + | __weakref__ + | list of weak references to the object + diff --git a/.tether/man/random_shuffle.txt b/.tether/man/random_shuffle.txt new file mode 100644 index 0000000000..25d7adeabd --- /dev/null +++ b/.tether/man/random_shuffle.txt @@ -0,0 +1,21 @@ +__signature__ +keras.random.shuffle( + x, + axis=0, + seed=None +) +__doc__ +Shuffle the elements of a tensor uniformly at random along an axis. + +Args: + x: The tensor to be shuffled. + axis: An integer specifying the axis along which to shuffle. Defaults to + `0`. + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. diff --git a/.tether/man/random_truncated_normal.txt b/.tether/man/random_truncated_normal.txt new file mode 100644 index 0000000000..dc0dacb025 --- /dev/null +++ b/.tether/man/random_truncated_normal.txt @@ -0,0 +1,32 @@ +__signature__ +keras.random.truncated_normal( + shape, + mean=0.0, + stddev=1.0, + dtype=None, + seed=None +) +__doc__ +Draw samples from a truncated normal distribution. + +The values are drawn from a normal distribution with specified mean and +standard deviation, discarding and re-drawing any samples that are more +than two standard deviations from the mean. + +Args: + shape: The shape of the random values to generate. + mean: Float, defaults to 0. Mean of the random values to generate. + stddev: Float, defaults to 1. Standard deviation of the random values + to generate. + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `keras.config.floatx()` is used, + which defaults to `float32` unless you configured it otherwise (via + `keras.config.set_floatx(float_dtype)`) + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. diff --git a/.tether/man/random_uniform.txt b/.tether/man/random_uniform.txt new file mode 100644 index 0000000000..e7de9c1cf4 --- /dev/null +++ b/.tether/man/random_uniform.txt @@ -0,0 +1,35 @@ +__signature__ +keras.random.uniform( + shape, + minval=0.0, + maxval=1.0, + dtype=None, + seed=None +) +__doc__ +Draw samples from a uniform distribution. + +The generated values follow a uniform distribution in the range +`[minval, maxval)`. The lower bound `minval` is included in the range, +while the upper bound `maxval` is excluded. + +`dtype` must be a floating point type, the default range is `[0, 1)`. + +Args: + shape: The shape of the random values to generate. + minval: Float, defaults to 0. Lower bound of the range of + random values to generate (inclusive). + maxval: Float, defaults to 1. Upper bound of the range of + random values to generate (exclusive). + dtype: Optional dtype of the tensor. Only floating point types are + supported. If not specified, `keras.config.floatx()` is used, + which defaults to `float32` unless you configured it otherwise (via + `keras.config.set_floatx(float_dtype)`) + seed: A Python integer or instance of + `keras.random.SeedGenerator`. + Used to make the behavior of the initializer + deterministic. Note that an initializer seeded with an integer + or None (unseeded) will produce the same random values + across multiple calls. To get different random values + across multiple calls, use as seed an instance + of `keras.random.SeedGenerator`. diff --git a/.tether/man/register_keras_serializable.txt b/.tether/man/register_keras_serializable.txt new file mode 100644 index 0000000000..a6beb980be --- /dev/null +++ b/.tether/man/register_keras_serializable.txt @@ -0,0 +1,41 @@ +__signature__ +keras.saving.register_keras_serializable(package='Custom', name=None) +__doc__ +Registers an object with the Keras serialization framework. + +This decorator injects the decorated class or function into the Keras custom +object dictionary, so that it can be serialized and deserialized without +needing an entry in the user-provided custom object dict. It also injects a +function that Keras will call to get the object's serializable string key. + +Note that to be serialized and deserialized, classes must implement the +`get_config()` method. Functions do not have this requirement. + +The object will be registered under the key `'package>name'` where `name`, +defaults to the object name if not passed. + +Example: + +```python +# Note that `'my_package'` is used as the `package` argument here, and since +# the `name` argument is not provided, `'MyDense'` is used as the `name`. +@register_keras_serializable('my_package') +class MyDense(keras.layers.Dense): + pass + +assert get_registered_object('my_package>MyDense') == MyDense +assert get_registered_name(MyDense) == 'my_package>MyDense' +``` + +Args: + package: The package that this class belongs to. This is used for the + `key` (which is `"package>name"`) to idenfify the class. Note that + this is the first argument passed into the decorator. + name: The name to serialize this class under in this package. If not + provided or `None`, the class' name will be used (note that this is + the case when the decorator is used with only one argument, which + becomes the `package`). + +Returns: + A decorator that registers the decorated class with the passed names. + diff --git a/.tether/man/regularizer_l1.txt b/.tether/man/regularizer_l1.txt new file mode 100644 index 0000000000..7fa399d07c --- /dev/null +++ b/.tether/man/regularizer_l1.txt @@ -0,0 +1,50 @@ +Help on class L1 in module keras.src.regularizers.regularizers: + +class L1(Regularizer) + | L1(l1=0.01) + | + | A regularizer that applies a L1 regularization penalty. + | + | The L1 regularization penalty is computed as: + | `loss = l1 * reduce_sum(abs(x))` + | + | L1 may be passed to a layer as a string identifier: + | + | >>> dense = Dense(3, kernel_regularizer='l1') + | + | In this case, the default value used is `l1=0.01`. + | + | Arguments: + | l1: float, L1 regularization factor. + | + | Method resolution order: + | L1 + | Regularizer + | builtins.object + | + | Methods defined here: + | + | __call__(self, x) + | Compute a regularization penalty from an input tensor. + | + | __init__(self, l1=0.01) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the config of the regularizer. + | + | An regularizer config is a Python dictionary (serializable) + | containing all configuration parameters of the regularizer. + | The same regularizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | This method is optional if you are just training and executing models, + | exporting to and from SavedModels, or using weight checkpoints. + | + | This method is required for Keras `model_to_estimator`, saving and + | loading models to HDF5 formats, Keras model cloning, some visualization + | utilities, and exporting models to and from JSON. + | + | Returns: + | Python dictionary. + | diff --git a/.tether/man/regularizer_l1_l2.txt b/.tether/man/regularizer_l1_l2.txt new file mode 100644 index 0000000000..4ec4a67a3d --- /dev/null +++ b/.tether/man/regularizer_l1_l2.txt @@ -0,0 +1,58 @@ +Help on class L1L2 in module keras.src.regularizers.regularizers: + +class L1L2(Regularizer) + | L1L2(l1=0.0, l2=0.0) + | + | A regularizer that applies both L1 and L2 regularization penalties. + | + | The L1 regularization penalty is computed as: + | `loss = l1 * reduce_sum(abs(x))` + | + | The L2 regularization penalty is computed as + | `loss = l2 * reduce_sum(square(x))` + | + | L1L2 may be passed to a layer as a string identifier: + | + | >>> dense = Dense(3, kernel_regularizer='l1_l2') + | + | In this case, the default values used are `l1=0.01` and `l2=0.01`. + | + | Arguments: + | l1: float, L1 regularization factor. + | l2: float, L2 regularization factor. + | + | Method resolution order: + | L1L2 + | Regularizer + | builtins.object + | + | Methods defined here: + | + | __call__(self, x) + | Compute a regularization penalty from an input tensor. + | + | __init__( + | self, + | l1=0.0, + | l2=0.0 + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the config of the regularizer. + | + | An regularizer config is a Python dictionary (serializable) + | containing all configuration parameters of the regularizer. + | The same regularizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | This method is optional if you are just training and executing models, + | exporting to and from SavedModels, or using weight checkpoints. + | + | This method is required for Keras `model_to_estimator`, saving and + | loading models to HDF5 formats, Keras model cloning, some visualization + | utilities, and exporting models to and from JSON. + | + | Returns: + | Python dictionary. + | diff --git a/.tether/man/regularizer_l2.txt b/.tether/man/regularizer_l2.txt new file mode 100644 index 0000000000..2c80891d41 --- /dev/null +++ b/.tether/man/regularizer_l2.txt @@ -0,0 +1,50 @@ +Help on class L2 in module keras.src.regularizers.regularizers: + +class L2(Regularizer) + | L2(l2=0.01) + | + | A regularizer that applies a L2 regularization penalty. + | + | The L2 regularization penalty is computed as: + | `loss = l2 * reduce_sum(square(x))` + | + | L2 may be passed to a layer as a string identifier: + | + | >>> dense = Dense(3, kernel_regularizer='l2') + | + | In this case, the default value used is `l2=0.01`. + | + | Arguments: + | l2: float, L2 regularization factor. + | + | Method resolution order: + | L2 + | Regularizer + | builtins.object + | + | Methods defined here: + | + | __call__(self, x) + | Compute a regularization penalty from an input tensor. + | + | __init__(self, l2=0.01) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the config of the regularizer. + | + | An regularizer config is a Python dictionary (serializable) + | containing all configuration parameters of the regularizer. + | The same regularizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | This method is optional if you are just training and executing models, + | exporting to and from SavedModels, or using weight checkpoints. + | + | This method is required for Keras `model_to_estimator`, saving and + | loading models to HDF5 formats, Keras model cloning, some visualization + | utilities, and exporting models to and from JSON. + | + | Returns: + | Python dictionary. + | diff --git a/.tether/man/regularizer_orthogonal.txt b/.tether/man/regularizer_orthogonal.txt new file mode 100644 index 0000000000..4dcc5c57d0 --- /dev/null +++ b/.tether/man/regularizer_orthogonal.txt @@ -0,0 +1,63 @@ +Help on class OrthogonalRegularizer in module keras.src.regularizers.regularizers: + +class OrthogonalRegularizer(Regularizer) + | OrthogonalRegularizer(factor=0.01, mode='rows') + | + | Regularizer that encourages input vectors to be orthogonal to each other. + | + | It can be applied to either the rows of a matrix (`mode="rows"`) or its + | columns (`mode="columns"`). When applied to a `Dense` kernel of shape + | `(input_dim, units)`, rows mode will seek to make the feature vectors + | (i.e. the basis of the output space) orthogonal to each other. + | + | Arguments: + | factor: Float. The regularization factor. The regularization penalty + | will be proportional to `factor` times the mean of the dot products + | between the L2-normalized rows (if `mode="rows"`, or columns if + | `mode="columns"`) of the inputs, excluding the product of each + | row/column with itself. Defaults to `0.01`. + | mode: String, one of `{"rows", "columns"}`. Defaults to `"rows"`. In + | rows mode, the regularization effect seeks to make the rows of the + | input orthogonal to each other. In columns mode, it seeks to make + | the columns of the input orthogonal to each other. + | + | Example: + | + | >>> regularizer = OrthogonalRegularizer(factor=0.01) + | >>> layer = Dense(units=4, kernel_regularizer=regularizer) + | + | Method resolution order: + | OrthogonalRegularizer + | Regularizer + | builtins.object + | + | Methods defined here: + | + | __call__(self, inputs) + | Compute a regularization penalty from an input tensor. + | + | __init__( + | self, + | factor=0.01, + | mode='rows' + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | get_config(self) + | Returns the config of the regularizer. + | + | An regularizer config is a Python dictionary (serializable) + | containing all configuration parameters of the regularizer. + | The same regularizer can be reinstantiated later + | (without any saved state) from this configuration. + | + | This method is optional if you are just training and executing models, + | exporting to and from SavedModels, or using weight checkpoints. + | + | This method is required for Keras `model_to_estimator`, saving and + | loading models to HDF5 formats, Keras model cloning, some visualization + | utilities, and exporting models to and from JSON. + | + | Returns: + | Python dictionary. + | diff --git a/.tether/man/rnn_cell_gru.txt b/.tether/man/rnn_cell_gru.txt new file mode 100644 index 0000000000..be7352e8ff --- /dev/null +++ b/.tether/man/rnn_cell_gru.txt @@ -0,0 +1,127 @@ +Help on class GRUCell in module keras.src.layers.rnn.gru: + +class GRUCell(keras.src.layers.layer.Layer, keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell) + | GRUCell(units, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, reset_after=True, seed=None, **kwargs) + | + | Cell class for the GRU layer. + | + | This class processes one step within the whole time sequence input, whereas + | `keras.layer.GRU` processes the whole sequence. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. Default: hyperbolic tangent + | (`tanh`). If you pass None, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | recurrent_activation: Activation function to use for the recurrent step. + | Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + | applied (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation + | of the recurrent state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | reset_after: GRU convention (whether to apply reset gate after or + | before matrix multiplication). False = "before", + | True = "after" (default and cuDNN compatible). + | seed: Random seed for dropout. + | + | Call arguments: + | inputs: A 2D tensor, with shape `(batch, features)`. + | states: A 2D tensor with shape `(batch, units)`, which is the state + | from the previous time step. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. Only relevant when `dropout` or + | `recurrent_dropout` is used. + | + | Example: + | + | >>> inputs = np.random.random((32, 10, 8)) + | >>> rnn = keras.layers.RNN(keras.layers.GRUCell(4)) + | >>> output = rnn(inputs) + | >>> output.shape + | (32, 4) + | >>> rnn = keras.layers.RNN( + | ... keras.layers.GRUCell(4), + | ... return_sequences=True, + | ... return_state=True) + | >>> whole_sequence_output, final_state = rnn(inputs) + | >>> whole_sequence_output.shape + | (32, 10, 4) + | >>> final_state.shape + | (32, 4) + | + | Method resolution order: + | GRUCell + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | reset_after=True, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | states, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | + diff --git a/.tether/man/rnn_cell_lstm.txt b/.tether/man/rnn_cell_lstm.txt new file mode 100644 index 0000000000..70e9b08aa8 --- /dev/null +++ b/.tether/man/rnn_cell_lstm.txt @@ -0,0 +1,129 @@ +Help on class LSTMCell in module keras.src.layers.rnn.lstm: + +class LSTMCell(keras.src.layers.layer.Layer, keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell) + | LSTMCell(units, activation='tanh', recurrent_activation='sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', unit_forget_bias=True, kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, **kwargs) + | + | Cell class for the LSTM layer. + | + | This class processes one step within the whole time sequence input, whereas + | `keras.layer.LSTM` processes the whole sequence. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. Default: hyperbolic tangent + | (`tanh`). If you pass None, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | recurrent_activation: Activation function to use for the recurrent step. + | Default: sigmoid (`sigmoid`). If you pass `None`, no activation is + | applied (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation + | of the recurrent state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | unit_forget_bias: Boolean (default `True`). If `True`, + | add 1 to the bias of the forget gate at initialization. + | Setting it to `True` will also force `bias_initializer="zeros"`. + | This is recommended in [Jozefowicz et al.]( + | https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf) + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | seed: Random seed for dropout. + | + | Call arguments: + | inputs: A 2D tensor, with shape `(batch, features)`. + | states: A 2D tensor with shape `(batch, units)`, which is the state + | from the previous time step. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. Only relevant when `dropout` or + | `recurrent_dropout` is used. + | + | Example: + | + | >>> inputs = np.random.random((32, 10, 8)) + | >>> rnn = keras.layers.RNN(keras.layers.LSTMCell(4)) + | >>> output = rnn(inputs) + | >>> output.shape + | (32, 4) + | >>> rnn = keras.layers.RNN( + | ... keras.layers.LSTMCell(4), + | ... return_sequences=True, + | ... return_state=True) + | >>> whole_sequence_output, final_state = rnn(inputs) + | >>> whole_sequence_output.shape + | (32, 10, 4) + | >>> final_state.shape + | (32, 4) + | + | Method resolution order: + | LSTMCell + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | recurrent_activation='sigmoid', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | unit_forget_bias=True, + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | states, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | + diff --git a/.tether/man/rnn_cell_simple.txt b/.tether/man/rnn_cell_simple.txt new file mode 100644 index 0000000000..8bcca18efb --- /dev/null +++ b/.tether/man/rnn_cell_simple.txt @@ -0,0 +1,119 @@ +Help on class SimpleRNNCell in module keras.src.layers.rnn.simple_rnn: + +class SimpleRNNCell(keras.src.layers.layer.Layer, keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell) + | SimpleRNNCell(units, activation='tanh', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0.0, recurrent_dropout=0.0, seed=None, **kwargs) + | + | Cell class for SimpleRNN. + | + | This class processes one step within the whole time sequence input, whereas + | `keras.layer.SimpleRNN` processes the whole sequence. + | + | Args: + | units: Positive integer, dimensionality of the output space. + | activation: Activation function to use. + | Default: hyperbolic tangent (`tanh`). + | If you pass `None`, no activation is applied + | (ie. "linear" activation: `a(x) = x`). + | use_bias: Boolean, (default `True`), whether the layer + | should use a bias vector. + | kernel_initializer: Initializer for the `kernel` weights matrix, + | used for the linear transformation of the inputs. Default: + | `"glorot_uniform"`. + | recurrent_initializer: Initializer for the `recurrent_kernel` + | weights matrix, used for the linear transformation + | of the recurrent state. Default: `"orthogonal"`. + | bias_initializer: Initializer for the bias vector. Default: `"zeros"`. + | kernel_regularizer: Regularizer function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_regularizer: Regularizer function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_regularizer: Regularizer function applied to the bias vector. + | Default: `None`. + | kernel_constraint: Constraint function applied to the `kernel` weights + | matrix. Default: `None`. + | recurrent_constraint: Constraint function applied to the + | `recurrent_kernel` weights matrix. Default: `None`. + | bias_constraint: Constraint function applied to the bias vector. + | Default: `None`. + | dropout: Float between 0 and 1. Fraction of the units to drop for the + | linear transformation of the inputs. Default: 0. + | recurrent_dropout: Float between 0 and 1. Fraction of the units to drop + | for the linear transformation of the recurrent state. Default: 0. + | seed: Random seed for dropout. + | + | Call arguments: + | sequence: A 2D tensor, with shape `(batch, features)`. + | states: A 2D tensor with shape `(batch, units)`, which is the state + | from the previous time step. + | training: Python boolean indicating whether the layer should behave in + | training mode or in inference mode. Only relevant when `dropout` or + | `recurrent_dropout` is used. + | + | Example: + | + | ```python + | inputs = np.random.random([32, 10, 8]).astype(np.float32) + | rnn = keras.layers.RNN(keras.layers.SimpleRNNCell(4)) + | output = rnn(inputs) # The output has shape `(32, 4)`. + | rnn = keras.layers.RNN( + | keras.layers.SimpleRNNCell(4), + | return_sequences=True, + | return_state=True + | ) + | # whole_sequence_output has shape `(32, 10, 4)`. + | # final_state has shape `(32, 4)`. + | whole_sequence_output, final_state = rnn(inputs) + | ``` + | + | Method resolution order: + | SimpleRNNCell + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | keras.src.layers.rnn.dropout_rnn_cell.DropoutRNNCell + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | units, + | activation='tanh', + | use_bias=True, + | kernel_initializer='glorot_uniform', + | recurrent_initializer='orthogonal', + | bias_initializer='zeros', + | kernel_regularizer=None, + | recurrent_regularizer=None, + | bias_regularizer=None, + | kernel_constraint=None, + | recurrent_constraint=None, + | bias_constraint=None, + | dropout=0.0, + | recurrent_dropout=0.0, + | seed=None, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | sequence, + | states, + | training=False + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | + diff --git a/.tether/man/rnn_cells_stack.txt b/.tether/man/rnn_cells_stack.txt new file mode 100644 index 0000000000..485995e431 --- /dev/null +++ b/.tether/man/rnn_cells_stack.txt @@ -0,0 +1,91 @@ +Help on class StackedRNNCells in module keras.src.layers.rnn.stacked_rnn_cells: + +class StackedRNNCells(keras.src.layers.layer.Layer) + | StackedRNNCells(cells, **kwargs) + | + | Wrapper allowing a stack of RNN cells to behave as a single cell. + | + | Used to implement efficient stacked RNNs. + | + | Args: + | cells: List of RNN cell instances. + | + | Example: + | + | ```python + | batch_size = 3 + | sentence_length = 5 + | num_features = 2 + | new_shape = (batch_size, sentence_length, num_features) + | x = np.reshape(np.arange(30), new_shape) + | + | rnn_cells = [keras.layers.LSTMCell(128) for _ in range(2)] + | stacked_lstm = keras.layers.StackedRNNCells(rnn_cells) + | lstm_layer = keras.layers.RNN(stacked_lstm) + | + | result = lstm_layer(x) + | ``` + | + | Method resolution order: + | StackedRNNCells + | keras.src.layers.layer.Layer + | keras.src.backend.tensorflow.layer.TFLayer + | keras.src.backend.tensorflow.trackable.KerasAutoTrackable + | tensorflow.python.trackable.autotrackable.AutoTrackable + | tensorflow.python.trackable.base.Trackable + | keras.src.ops.operation.Operation + | builtins.object + | + | Methods defined here: + | + | __init__( + | self, + | cells, + | **kwargs + | ) + | Initialize self. See help(type(self)) for accurate signature. + | + | build(self, input_shape) + | + | call( + | self, + | inputs, + | states, + | training=False, + | **kwargs + | ) + | + | get_config(self) + | Returns the config of the object. + | + | An object config is a Python dictionary (serializable) + | containing the information needed to re-instantiate it. + | + | get_initial_state(self, batch_size=None) + | + | ---------------------------------------------------------------------- + | Class methods defined here: + | + | from_config(config, custom_objects=None) from builtins.type + | Creates a layer from its config. + | + | This method is the reverse of `get_config`, + | capable of instantiating the same layer from the config + | dictionary. It does not handle layer connectivity + | (handled by Network), nor weights (handled by `set_weights`). + | + | Args: + | config: A Python dictionary, typically the + | output of get_config. + | + | Returns: + | A layer instance. + | + | ---------------------------------------------------------------------- + | Readonly properties defined here: + | + | output_size + | + | state_size + | + diff --git a/.tether/man/save_model.txt b/.tether/man/save_model.txt new file mode 100644 index 0000000000..dc7489d0cc --- /dev/null +++ b/.tether/man/save_model.txt @@ -0,0 +1,40 @@ +__signature__ +keras.saving.save_model( + model, + filepath, + overwrite=True, + **kwargs +) +__doc__ +Saves a model as a `.keras` file. + +Args: + model: Keras model instance to be saved. + filepath: `str` or `pathlib.Path` object. Path where to save the model. + overwrite: Whether we should overwrite any existing model at the target + location, or instead ask the user via an interactive prompt. + +Example: + +```python +model = keras.Sequential( + [ + keras.layers.Dense(5, input_shape=(3,)), + keras.layers.Softmax(), + ], +) +model.save("model.keras") +loaded_model = keras.saving.load_model("model.keras") +x = keras.random.uniform((10, 3)) +assert np.allclose(model.predict(x), loaded_model.predict(x)) +``` + +Note that `model.save()` is an alias for `keras.saving.save_model()`. + +The saved `.keras` file contains: + +- The model's configuration (architecture) +- The model's weights +- The model's optimizer's state (if any) + +Thus models can be reinstantiated in the exact same state. diff --git a/.tether/man/save_model_config.txt b/.tether/man/save_model_config.txt new file mode 100644 index 0000000000..f7079c7e70 --- /dev/null +++ b/.tether/man/save_model_config.txt @@ -0,0 +1,15 @@ +__signature__ +keras.Model.to_json(self, **kwargs) +__doc__ +Returns a JSON string containing the network configuration. + +To load a network from a JSON save file, use +`keras.models.model_from_json(json_string, custom_objects={...})`. + +Args: + **kwargs: Additional keyword arguments to be passed to + `json.dumps()`. + +Returns: + A JSON string. + diff --git a/.tether/man/save_model_weights.txt b/.tether/man/save_model_weights.txt new file mode 100644 index 0000000000..2820c7a22e --- /dev/null +++ b/.tether/man/save_model_weights.txt @@ -0,0 +1,15 @@ +__signature__ +keras.Model.save_weights( + self, + filepath, + overwrite=True +) +__doc__ +Saves all layer weights to a `.weights.h5` file. + +Args: + filepath: `str` or `pathlib.Path` object. + Path where to save the model. Must end in `.weights.h5`. + overwrite: Whether we should overwrite any existing model + at the target location, or instead ask the user + via an interactive prompt. diff --git a/.tether/man/set_random_seed.txt b/.tether/man/set_random_seed.txt new file mode 100644 index 0000000000..0baf9087c1 --- /dev/null +++ b/.tether/man/set_random_seed.txt @@ -0,0 +1,35 @@ +__signature__ +keras.utils.set_random_seed(seed) +__doc__ +Sets all random seeds (Python, NumPy, and backend framework, e.g. TF). + +You can use this utility to make almost any Keras program fully +deterministic. Some limitations apply in cases where network communications +are involved (e.g. parameter server distribution), which creates additional +sources of randomness, or when certain non-deterministic cuDNN ops are +involved. + +Calling this utility is equivalent to the following: + +```python +import random +random.seed(seed) + +import numpy as np +np.random.seed(seed) + +import tensorflow as tf # Only if TF is installed +tf.random.set_seed(seed) + +import torch # Only if the backend is 'torch' +torch.manual_seed(seed) +``` + +Note that the TensorFlow seed is set even if you're not using TensorFlow +as your backend framework, since many workflows leverage `tf.data` +pipelines (which feature random shuffling). Likewise many workflows +might leverage NumPy APIs. + +Arguments: + seed: Integer, the random seed to use. + diff --git a/.tether/man/split_dataset.txt b/.tether/man/split_dataset.txt new file mode 100644 index 0000000000..0af1a9a352 --- /dev/null +++ b/.tether/man/split_dataset.txt @@ -0,0 +1,41 @@ +__signature__ +keras.utils.split_dataset( + dataset, + left_size=None, + right_size=None, + shuffle=False, + seed=None +) +__doc__ +Splits a dataset into a left half and a right half (e.g. train / test). + +Args: + dataset: + A `tf.data.Dataset`, a `torch.utils.data.Dataset` object, + or a list/tuple of arrays with the same length. + left_size: If float (in the range `[0, 1]`), it signifies + the fraction of the data to pack in the left dataset. If integer, it + signifies the number of samples to pack in the left dataset. If + `None`, defaults to the complement to `right_size`. + Defaults to `None`. + right_size: If float (in the range `[0, 1]`), it signifies + the fraction of the data to pack in the right dataset. + If integer, it signifies the number of samples to pack + in the right dataset. + If `None`, defaults to the complement to `left_size`. + Defaults to `None`. + shuffle: Boolean, whether to shuffle the data before splitting it. + seed: A random seed for shuffling. + +Returns: + A tuple of two `tf.data.Dataset` objects: + the left and right splits. + +Example: + +>>> data = np.random.random(size=(1000, 4)) +>>> left_ds, right_ds = keras.utils.split_dataset(data, left_size=0.8) +>>> int(left_ds.cardinality()) +800 +>>> int(right_ds.cardinality()) +200 diff --git a/.tether/man/standardize_dtype.txt b/.tether/man/standardize_dtype.txt new file mode 100644 index 0000000000..9cd72f85e4 --- /dev/null +++ b/.tether/man/standardize_dtype.txt @@ -0,0 +1,4 @@ +__signature__ +keras.utils.standardize_dtype(dtype) +__doc__ + diff --git a/.tether/man/test_on_batch.txt b/.tether/man/test_on_batch.txt new file mode 100644 index 0000000000..d4c4b1c976 --- /dev/null +++ b/.tether/man/test_on_batch.txt @@ -0,0 +1,29 @@ +__signature__ +keras.Model.test_on_batch( + self, + x, + y=None, + sample_weight=None, + return_dict=False +) +__doc__ +Test the model on a single batch of samples. + +Args: + x: Input data. Must be array-like. + y: Target data. Must be array-like. + sample_weight: Optional array of the same length as x, containing + weights to apply to the model's loss for each sample. + In the case of temporal data, you can pass a 2D array + with shape `(samples, sequence_length)`, to apply a different + weight to every timestep of every sample. + return_dict: If `True`, loss and metric results are returned as a + dict, with each key being the name of the metric. If `False`, + they are returned as a list. + +Returns: + A scalar loss value (when no metrics and `return_dict=False`), + a list of loss and metric values + (if there are metrics and `return_dict=False`), or a dict of + metric and loss values (if `return_dict=True`). + diff --git a/.tether/man/text_dataset_from_directory.txt b/.tether/man/text_dataset_from_directory.txt new file mode 100644 index 0000000000..1e3e05f72f --- /dev/null +++ b/.tether/man/text_dataset_from_directory.txt @@ -0,0 +1,103 @@ +__signature__ +keras.utils.text_dataset_from_directory( + directory, + labels='inferred', + label_mode='int', + class_names=None, + batch_size=32, + max_length=None, + shuffle=True, + seed=None, + validation_split=None, + subset=None, + follow_links=False, + verbose=True +) +__doc__ +Generates a `tf.data.Dataset` from text files in a directory. + +If your directory structure is: + +``` +main_directory/ +...class_a/ +......a_text_1.txt +......a_text_2.txt +...class_b/ +......b_text_1.txt +......b_text_2.txt +``` + +Then calling `text_dataset_from_directory(main_directory, +labels='inferred')` will return a `tf.data.Dataset` that yields batches of +texts from the subdirectories `class_a` and `class_b`, together with labels +0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). + +Only `.txt` files are supported at this time. + +Args: + directory: Directory where the data is located. + If `labels` is `"inferred"`, it should contain + subdirectories, each containing text files for a class. + Otherwise, the directory structure is ignored. + labels: Either `"inferred"` + (labels are generated from the directory structure), + `None` (no labels), + or a list/tuple of integer labels of the same size as the number of + text files found in the directory. Labels should be sorted according + to the alphanumeric order of the text file paths + (obtained via `os.walk(directory)` in Python). + label_mode: String describing the encoding of `labels`. Options are: + - `"int"`: means that the labels are encoded as integers + (e.g. for `sparse_categorical_crossentropy` loss). + - `"categorical"` means that the labels are + encoded as a categorical vector + (e.g. for `categorical_crossentropy` loss). + - `"binary"` means that the labels (there can be only 2) + are encoded as `float32` scalars with values 0 or 1 + (e.g. for `binary_crossentropy`). + - `None` (no labels). + class_names: Only valid if `"labels"` is `"inferred"`. + This is the explicit list of class names + (must match names of subdirectories). Used to control the order + of the classes (otherwise alphanumerical order is used). + batch_size: Size of the batches of data. Defaults to 32. + If `None`, the data will not be batched + (the dataset will yield individual samples). + max_length: Maximum size of a text string. Texts longer than this will + be truncated to `max_length`. + shuffle: Whether to shuffle the data. Defaults to `True`. + If set to `False`, sorts the data in alphanumeric order. + seed: Optional random seed for shuffling and transformations. + validation_split: Optional float between 0 and 1, + fraction of data to reserve for validation. + subset: Subset of the data to return. + One of `"training"`, `"validation"` or `"both"`. + Only used if `validation_split` is set. + When `subset="both"`, the utility returns a tuple of two datasets + (the training and validation datasets respectively). + follow_links: Whether to visits subdirectories pointed to by symlinks. + Defaults to `False`. + verbose: Whether to display number information on classes and + number of files found. Defaults to `True`. + +Returns: + +A `tf.data.Dataset` object. + +- If `label_mode` is `None`, it yields `string` tensors of shape + `(batch_size,)`, containing the contents of a batch of text files. +- Otherwise, it yields a tuple `(texts, labels)`, where `texts` + has shape `(batch_size,)` and `labels` follows the format described + below. + +Rules regarding labels format: + +- if `label_mode` is `int`, the labels are an `int32` tensor of shape + `(batch_size,)`. +- if `label_mode` is `binary`, the labels are a `float32` tensor of + 1s and 0s of shape `(batch_size, 1)`. +- if `label_mode` is `categorical`, the labels are a `float32` tensor + of shape `(batch_size, num_classes)`, representing a one-hot + encoding of the class index. + diff --git a/.tether/man/timeseries_dataset_from_array.txt b/.tether/man/timeseries_dataset_from_array.txt new file mode 100644 index 0000000000..f31c19f6db --- /dev/null +++ b/.tether/man/timeseries_dataset_from_array.txt @@ -0,0 +1,124 @@ +__signature__ +keras.utils.timeseries_dataset_from_array( + data, + targets, + sequence_length, + sequence_stride=1, + sampling_rate=1, + batch_size=128, + shuffle=False, + seed=None, + start_index=None, + end_index=None +) +__doc__ +Creates a dataset of sliding windows over a timeseries provided as array. + +This function takes in a sequence of data-points gathered at +equal intervals, along with time series parameters such as +length of the sequences/windows, spacing between two sequence/windows, etc., +to produce batches of timeseries inputs and targets. + +Args: + data: Numpy array or eager tensor + containing consecutive data points (timesteps). + Axis 0 is expected to be the time dimension. + targets: Targets corresponding to timesteps in `data`. + `targets[i]` should be the target + corresponding to the window that starts at index `i` + (see example 2 below). + Pass `None` if you don't have target data (in this case the dataset + will only yield the input data). + sequence_length: Length of the output sequences + (in number of timesteps). + sequence_stride: Period between successive output sequences. + For stride `s`, output samples would + start at index `data[i]`, `data[i + s]`, `data[i + 2 * s]`, etc. + sampling_rate: Period between successive individual timesteps + within sequences. For rate `r`, timesteps + `data[i], data[i + r], ... data[i + sequence_length]` + are used for creating a sample sequence. + batch_size: Number of timeseries samples in each batch + (except maybe the last one). If `None`, the data will not be batched + (the dataset will yield individual samples). + shuffle: Whether to shuffle output samples, + or instead draw them in chronological order. + seed: Optional int; random seed for shuffling. + start_index: Optional int; data points earlier (exclusive) + than `start_index` will not be used + in the output sequences. This is useful to reserve part of the + data for test or validation. + end_index: Optional int; data points later (exclusive) than `end_index` + will not be used in the output sequences. + This is useful to reserve part of the data for test or validation. + +Returns: + +A `tf.data.Dataset` instance. If `targets` was passed, the dataset yields +tuple `(batch_of_sequences, batch_of_targets)`. If not, the dataset yields +only `batch_of_sequences`. + +Example 1: + +Consider indices `[0, 1, ... 98]`. +With `sequence_length=10, sampling_rate=2, sequence_stride=3`, +`shuffle=False`, the dataset will yield batches of sequences +composed of the following indices: + +``` +First sequence: [0 2 4 6 8 10 12 14 16 18] +Second sequence: [3 5 7 9 11 13 15 17 19 21] +Third sequence: [6 8 10 12 14 16 18 20 22 24] +... +Last sequence: [78 80 82 84 86 88 90 92 94 96] +``` + +In this case the last 2 data points are discarded since no full sequence +can be generated to include them (the next sequence would have started +at index 81, and thus its last step would have gone over 98). + +Example 2: Temporal regression. + +Consider an array `data` of scalar values, of shape `(steps,)`. +To generate a dataset that uses the past 10 +timesteps to predict the next timestep, you would use: + +```python +input_data = data[:-10] +targets = data[10:] +dataset = timeseries_dataset_from_array( + input_data, targets, sequence_length=10) +for batch in dataset: + inputs, targets = batch + assert np.array_equal(inputs[0], data[:10]) # First sequence: steps [0-9] + # Corresponding target: step 10 + assert np.array_equal(targets[0], data[10]) + break +``` + +Example 3: Temporal regression for many-to-many architectures. + +Consider two arrays of scalar values `X` and `Y`, +both of shape `(100,)`. The resulting dataset should consist samples with +20 timestamps each. The samples should not overlap. +To generate a dataset that uses the current timestamp +to predict the corresponding target timestep, you would use: + +```python +X = np.arange(100) +Y = X*2 + +sample_length = 20 +input_dataset = timeseries_dataset_from_array( + X, None, sequence_length=sample_length, sequence_stride=sample_length) +target_dataset = timeseries_dataset_from_array( + Y, None, sequence_length=sample_length, sequence_stride=sample_length) + +for batch in zip(input_dataset, target_dataset): + inputs, targets = batch + assert np.array_equal(inputs[0], X[:sample_length]) + + # second sample equals output timestamps 20-40 + assert np.array_equal(targets[1], Y[sample_length:2*sample_length]) + break +``` diff --git a/.tether/man/to_categorical.txt b/.tether/man/to_categorical.txt new file mode 100644 index 0000000000..ca6dbad257 --- /dev/null +++ b/.tether/man/to_categorical.txt @@ -0,0 +1,39 @@ +__signature__ +keras.utils.to_categorical(x, num_classes=None) +__doc__ +Converts a class vector (integers) to binary class matrix. + +E.g. for use with `categorical_crossentropy`. + +Args: + x: Array-like with class values to be converted into a matrix + (integers from 0 to `num_classes - 1`). + num_classes: Total number of classes. If `None`, this would be inferred + as `max(x) + 1`. Defaults to `None`. + +Returns: + A binary matrix representation of the input as a NumPy array. The class + axis is placed last. + +Example: + +>>> a = keras.utils.to_categorical([0, 1, 2, 3], num_classes=4) +>>> print(a) +[[1. 0. 0. 0.] + [0. 1. 0. 0.] + [0. 0. 1. 0.] + [0. 0. 0. 1.]] + +>>> b = np.array([.9, .04, .03, .03, +... .3, .45, .15, .13, +... .04, .01, .94, .05, +... .12, .21, .5, .17], +... shape=[4, 4]) +>>> loss = keras.ops.categorical_crossentropy(a, b) +>>> print(np.around(loss, 5)) +[0.10536 0.82807 0.1011 1.77196] + +>>> loss = keras.ops.categorical_crossentropy(a, a) +>>> print(np.around(loss, 5)) +[0. 0. 0. 0.] + diff --git a/.tether/man/train_on_batch.txt b/.tether/man/train_on_batch.txt new file mode 100644 index 0000000000..e7e97eb8cf --- /dev/null +++ b/.tether/man/train_on_batch.txt @@ -0,0 +1,38 @@ +__signature__ +keras.Model.train_on_batch( + self, + x, + y=None, + sample_weight=None, + class_weight=None, + return_dict=False +) +__doc__ +Runs a single gradient update on a single batch of data. + +Args: + x: Input data. Must be array-like. + y: Target data. Must be array-like. + sample_weight: Optional array of the same length as x, containing + weights to apply to the model's loss for each sample. + In the case of temporal data, you can pass a 2D array + with shape `(samples, sequence_length)`, to apply a different + weight to every timestep of every sample. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) to apply to the model's loss for the samples + from this class during training. This can be useful to tell the + model to "pay more attention" to samples from an + under-represented class. When `class_weight` is specified + and targets have a rank of 2 or greater, either `y` must + be one-hot encoded, or an explicit final dimension of 1 + must be included for sparse class labels. + return_dict: If `True`, loss and metric results are returned as a + dict, with each key being the name of the metric. If `False`, + they are returned as a list. + +Returns: + A scalar loss value (when no metrics and `return_dict=False`), + a list of loss and metric values + (if there are metrics and `return_dict=False`), or a dict of + metric and loss values (if `return_dict=True`). + diff --git a/.tether/man/unpack_x_y_sample_weight.txt b/.tether/man/unpack_x_y_sample_weight.txt new file mode 100644 index 0000000000..c1588cfd04 --- /dev/null +++ b/.tether/man/unpack_x_y_sample_weight.txt @@ -0,0 +1,27 @@ +__signature__ +keras.utils.unpack_x_y_sample_weight(data) +__doc__ +Unpacks user-provided data tuple. + +This is a convenience utility to be used when overriding +`Model.train_step`, `Model.test_step`, or `Model.predict_step`. +This utility makes it easy to support data of the form `(x,)`, +`(x, y)`, or `(x, y, sample_weight)`. + +Example: + +>>> features_batch = ops.ones((10, 5)) +>>> labels_batch = ops.zeros((10, 5)) +>>> data = (features_batch, labels_batch) +>>> # `y` and `sample_weight` will default to `None` if not provided. +>>> x, y, sample_weight = unpack_x_y_sample_weight(data) +>>> sample_weight is None +True + +Args: + data: A tuple of the form `(x,)`, `(x, y)`, or `(x, y, sample_weight)`. + +Returns: + The unpacked tuple, with `None`s for `y` and `sample_weight` if they are + not provided. + diff --git a/.tether/vignettes-src/custom_train_step_in_jax.Rmd b/.tether/vignettes-src/custom_train_step_in_jax.Rmd new file mode 100644 index 0000000000..2f57238c66 --- /dev/null +++ b/.tether/vignettes-src/custom_train_step_in_jax.Rmd @@ -0,0 +1,350 @@ +--- +title: Customizing what happens in `fit()` with JAX +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/27 +last-modified: 2023/06/27 +description: Overriding the training step of the Model class with JAX. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras/guides/custom_train_step_in_jax.py +--- + +## Introduction + +When you're doing supervised learning, you can use `fit()` and everything works +smoothly. + +When you need to take control of every little detail, you can write your own training +loop entirely from scratch. + +But what if you need a custom training algorithm, but you still want to benefit from +the convenient features of `fit()`, such as callbacks, built-in distribution support, +or step fusing? + +A core principle of Keras is **progressive disclosure of complexity**. You should +always be able to get into lower-level workflows in a gradual way. You shouldn't fall +off a cliff if the high-level functionality doesn't exactly match your use case. You +should be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience. + +When you need to customize what `fit()` does, you should **override the training step +function of the `Model` class**. This is the function that is called by `fit()` for +every batch of data. You will then be able to call `fit()` as usual -- and it will be +running your own learning algorithm. + +Note that this pattern does not prevent you from building models with the Functional +API. You can do this whether you're building `Sequential` models, Functional API +models, or subclassed models. + +Let's see how that works. + +## Setup + +```python +import os + +# This guide can only be run with the JAX backend. +os.environ["KERAS_BACKEND"] = "jax" + +import jax +import keras +import numpy as np +``` + +## A first simple example + +Let's start from a simple example: + +- We create a new class that subclasses `keras.Model`. +- We implement a fully-stateless `compute_loss_and_updates()` method +to compute the loss as well as the updated values for the non-trainable +variables of the model. Internally, it calls `stateless_call()` and +the built-in `compute_loss()`. +- We implement a fully-stateless `train_step()` method to compute current +metric values (including the loss) as well as updated values for the +trainable variables, the optimizer variables, and the metric variables. + +Note that you can also take into account the `sample_weight` argument by: + +- Unpacking the data as `x, y, sample_weight = data` +- Passing `sample_weight` to `compute_loss()` +- Passing `sample_weight` alongside `y` and `y_pred` +to metrics in `stateless_update_state()` + +```python +class CustomModel(keras.Model): + def compute_loss_and_updates( + self, + trainable_variables, + non_trainable_variables, + x, + y, + training=False, + ): + y_pred, non_trainable_variables = self.stateless_call( + trainable_variables, + non_trainable_variables, + x, + training=training, + ) + loss = self.compute_loss(x, y, y_pred) + return loss, (y_pred, non_trainable_variables) + + def train_step(self, state, data): + ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metrics_variables, + ) = state + x, y = data + + # Get the gradient function. + grad_fn = jax.value_and_grad( + self.compute_loss_and_updates, has_aux=True + ) + + # Compute the gradients. + (loss, (y_pred, non_trainable_variables)), grads = grad_fn( + trainable_variables, + non_trainable_variables, + x, + y, + training=True, + ) + + # Update trainable variables and optimizer variables. + ( + trainable_variables, + optimizer_variables, + ) = self.optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + + # Update metrics. + new_metrics_vars = [] + for metric in self.metrics: + this_metric_vars = metrics_variables[ + len(new_metrics_vars) : len(new_metrics_vars) + + len(metric.variables) + ] + if metric.name == "loss": + this_metric_vars = metric.stateless_update_state( + this_metric_vars, loss + ) + else: + this_metric_vars = metric.stateless_update_state( + this_metric_vars, y, y_pred + ) + logs = metric.stateless_result(this_metric_vars) + new_metrics_vars += this_metric_vars + + # Return metric logs and updated state variables. + state = ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + new_metrics_vars, + ) + return logs, state +``` + +Let's try this out: + +```python +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# Just use `fit` as usual +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=3) +``` + +## Going lower-level + +Naturally, you could just skip passing a loss function in `compile()`, and instead do +everything *manually* in `train_step`. Likewise for metrics. + +Here's a lower-level example, that only uses `compile()` to configure the optimizer: + +```python +class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_tracker = keras.metrics.Mean(name="loss") + self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae") + self.loss_fn = keras.losses.MeanSquaredError() + + def compute_loss_and_updates( + self, + trainable_variables, + non_trainable_variables, + x, + y, + training=False, + ): + y_pred, non_trainable_variables = self.stateless_call( + trainable_variables, + non_trainable_variables, + x, + training=training, + ) + loss = self.loss_fn(y, y_pred) + return loss, (y_pred, non_trainable_variables) + + def train_step(self, state, data): + ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metrics_variables, + ) = state + x, y = data + + # Get the gradient function. + grad_fn = jax.value_and_grad( + self.compute_loss_and_updates, has_aux=True + ) + + # Compute the gradients. + (loss, (y_pred, non_trainable_variables)), grads = grad_fn( + trainable_variables, + non_trainable_variables, + x, + y, + training=True, + ) + + # Update trainable variables and optimizer variables. + ( + trainable_variables, + optimizer_variables, + ) = self.optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + + # Update metrics. + loss_tracker_vars = metrics_variables[ + : len(self.loss_tracker.variables) + ] + mae_metric_vars = metrics_variables[len(self.loss_tracker.variables) :] + + loss_tracker_vars = self.loss_tracker.stateless_update_state( + loss_tracker_vars, loss + ) + mae_metric_vars = self.mae_metric.stateless_update_state( + mae_metric_vars, y, y_pred + ) + + logs = {} + logs[self.loss_tracker.name] = self.loss_tracker.stateless_result( + loss_tracker_vars + ) + logs[self.mae_metric.name] = self.mae_metric.stateless_result( + mae_metric_vars + ) + + new_metrics_vars = loss_tracker_vars + mae_metric_vars + + # Return metric logs and updated state variables. + state = ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + new_metrics_vars, + ) + return logs, state + + @property + def metrics(self): + # We list our `Metric` objects here so that `reset_states()` can be + # called automatically at the start of each epoch + # or at the start of `evaluate()`. + return [self.loss_tracker, self.mae_metric] + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) + +# We don't pass a loss or metrics here. +model.compile(optimizer="adam") + +# Just use `fit` as usual -- you can use callbacks, etc. +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=5) +``` + +## Providing your own evaluation step + +What if you want to do the same for calls to `model.evaluate()`? Then you would +override `test_step` in exactly the same way. Here's what it looks like: + +```python +class CustomModel(keras.Model): + def test_step(self, state, data): + # Unpack the data. + x, y = data + ( + trainable_variables, + non_trainable_variables, + metrics_variables, + ) = state + + # Compute predictions and loss. + y_pred, non_trainable_variables = self.stateless_call( + trainable_variables, + non_trainable_variables, + x, + training=False, + ) + loss = self.compute_loss(x, y, y_pred) + + # Update metrics. + new_metrics_vars = [] + for metric in self.metrics: + this_metric_vars = metrics_variables[ + len(new_metrics_vars) : len(new_metrics_vars) + + len(metric.variables) + ] + if metric.name == "loss": + this_metric_vars = metric.stateless_update_state( + this_metric_vars, loss + ) + else: + this_metric_vars = metric.stateless_update_state( + this_metric_vars, y, y_pred + ) + logs = metric.stateless_result(this_metric_vars) + new_metrics_vars += this_metric_vars + + # Return metric logs and updated state variables. + state = ( + trainable_variables, + non_trainable_variables, + new_metrics_vars, + ) + return logs, state + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(loss="mse", metrics=["mae"]) + +# Evaluate with our custom test_step +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.evaluate(x, y) +``` + +That's it! + diff --git a/.tether/vignettes-src/custom_train_step_in_tensorflow.Rmd b/.tether/vignettes-src/custom_train_step_in_tensorflow.Rmd new file mode 100644 index 0000000000..a621c2b356 --- /dev/null +++ b/.tether/vignettes-src/custom_train_step_in_tensorflow.Rmd @@ -0,0 +1,451 @@ +--- +title: Customizing what happens in `fit()` with TensorFlow +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2020/04/15 +last-modified: 2023/06/27 +description: Overriding the training step of the Model class with TensorFlow. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/custom_train_step_in_tensorflow.py +--- + +## Introduction + +When you're doing supervised learning, you can use `fit()` and everything works +smoothly. + +When you need to take control of every little detail, you can write your own training +loop entirely from scratch. + +But what if you need a custom training algorithm, but you still want to benefit from +the convenient features of `fit()`, such as callbacks, built-in distribution support, +or step fusing? + +A core principle of Keras is **progressive disclosure of complexity**. You should +always be able to get into lower-level workflows in a gradual way. You shouldn't fall +off a cliff if the high-level functionality doesn't exactly match your use case. You +should be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience. + +When you need to customize what `fit()` does, you should **override the training step +function of the `Model` class**. This is the function that is called by `fit()` for +every batch of data. You will then be able to call `fit()` as usual -- and it will be +running your own learning algorithm. + +Note that this pattern does not prevent you from building models with the Functional +API. You can do this whether you're building `Sequential` models, Functional API +models, or subclassed models. + +Let's see how that works. + +## Setup + +```python +import os + +# This guide can only be run with the TF backend. +os.environ["KERAS_BACKEND"] = "tensorflow" + +import tensorflow as tf +import keras +from keras import layers +import numpy as np +``` + +## A first simple example + +Let's start from a simple example: + +- We create a new class that subclasses `keras.Model`. +- We just override the method `train_step(self, data)`. +- We return a dictionary mapping metric names (including the loss) to their current +value. + +The input argument `data` is what gets passed to fit as training data: + +- If you pass NumPy arrays, by calling `fit(x, y, ...)`, then `data` will be the tuple +`(x, y)` +- If you pass a `tf.data.Dataset`, by calling `fit(dataset, ...)`, then `data` will be +what gets yielded by `dataset` at each batch. + +In the body of the `train_step()` method, we implement a regular training update, +similar to what you are already familiar with. Importantly, **we compute the loss via +`self.compute_loss()`**, which wraps the loss(es) function(s) that were passed to +`compile()`. + +Similarly, we call `metric.update_state(y, y_pred)` on metrics from `self.metrics`, +to update the state of the metrics that were passed in `compile()`, +and we query results from `self.metrics` at the end to retrieve their current value. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + x, y = data + + with tf.GradientTape() as tape: + y_pred = self(x, training=True) # Forward pass + # Compute the loss value + # (the loss function is configured in `compile()`) + loss = self.compute_loss(y=y, y_pred=y_pred) + + # Compute gradients + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + + # Update weights + self.optimizer.apply(gradients, trainable_vars) + + # Update metrics (includes the metric that tracks the loss) + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred) + + # Return a dict mapping metric names to current value + return {m.name: m.result() for m in self.metrics} +``` + +Let's try this out: + +```python +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# Just use `fit` as usual +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=3) +``` + +## Going lower-level + +Naturally, you could just skip passing a loss function in `compile()`, and instead do +everything *manually* in `train_step`. Likewise for metrics. + +Here's a lower-level example, that only uses `compile()` to configure the optimizer: + +- We start by creating `Metric` instances to track our loss and a MAE score (in `__init__()`). +- We implement a custom `train_step()` that updates the state of these metrics +(by calling `update_state()` on them), then query them (via `result()`) to return their current average value, +to be displayed by the progress bar and to be pass to any callback. +- Note that we would need to call `reset_states()` on our metrics between each epoch! Otherwise +calling `result()` would return an average since the start of training, whereas we usually work +with per-epoch averages. Thankfully, the framework can do that for us: just list any metric +you want to reset in the `metrics` property of the model. The model will call `reset_states()` +on any object listed here at the beginning of each `fit()` epoch or at the beginning of a call to +`evaluate()`. + +```python +class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_tracker = keras.metrics.Mean(name="loss") + self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae") + self.loss_fn = keras.losses.MeanSquaredError() + + def train_step(self, data): + x, y = data + + with tf.GradientTape() as tape: + y_pred = self(x, training=True) # Forward pass + # Compute our own loss + loss = self.loss_fn(y, y_pred) + + # Compute gradients + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + + # Update weights + self.optimizer.apply(gradients, trainable_vars) + + # Compute our own metrics + self.loss_tracker.update_state(loss) + self.mae_metric.update_state(y, y_pred) + return { + "loss": self.loss_tracker.result(), + "mae": self.mae_metric.result(), + } + + @property + def metrics(self): + # We list our `Metric` objects here so that `reset_states()` can be + # called automatically at the start of each epoch + # or at the start of `evaluate()`. + return [self.loss_tracker, self.mae_metric] + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) + +# We don't pass a loss or metrics here. +model.compile(optimizer="adam") + +# Just use `fit` as usual -- you can use callbacks, etc. +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=5) +``` + +## Supporting `sample_weight` & `class_weight` + +You may have noticed that our first basic example didn't make any mention of sample +weighting. If you want to support the `fit()` arguments `sample_weight` and +`class_weight`, you'd simply do the following: + +- Unpack `sample_weight` from the `data` argument +- Pass it to `compute_loss` & `update_state` (of course, you could also just apply +it manually if you don't rely on `compile()` for losses & metrics) +- That's it. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + if len(data) == 3: + x, y, sample_weight = data + else: + sample_weight = None + x, y = data + + with tf.GradientTape() as tape: + y_pred = self(x, training=True) # Forward pass + # Compute the loss value. + # The loss function is configured in `compile()`. + loss = self.compute_loss( + y=y, + y_pred=y_pred, + sample_weight=sample_weight, + ) + + # Compute gradients + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + + # Update weights + self.optimizer.apply(gradients, trainable_vars) + + # Update the metrics. + # Metrics are configured in `compile()`. + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred, sample_weight=sample_weight) + + # Return a dict mapping metric names to current value. + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# You can now use sample_weight argument +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +sw = np.random.random((1000, 1)) +model.fit(x, y, sample_weight=sw, epochs=3) +``` + +## Providing your own evaluation step + +What if you want to do the same for calls to `model.evaluate()`? Then you would +override `test_step` in exactly the same way. Here's what it looks like: + +```python +class CustomModel(keras.Model): + def test_step(self, data): + # Unpack the data + x, y = data + # Compute predictions + y_pred = self(x, training=False) + # Updates the metrics tracking the loss + loss = self.compute_loss(y=y, y_pred=y_pred) + # Update the metrics. + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred) + # Return a dict mapping metric names to current value. + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(loss="mse", metrics=["mae"]) + +# Evaluate with our custom test_step +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.evaluate(x, y) +``` + +## Wrapping up: an end-to-end GAN example + +Let's walk through an end-to-end example that leverages everything you just learned. + +Let's consider: + +- A generator network meant to generate 28x28x1 images. +- A discriminator network meant to classify 28x28x1 images into two classes ("fake" and +"real"). +- One optimizer for each. +- A loss function to train the discriminator. + +```python +# Create the discriminator +discriminator = keras.Sequential( + [ + keras.Input(shape=(28, 28, 1)), + layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.GlobalMaxPooling2D(), + layers.Dense(1), + ], + name="discriminator", +) + +# Create the generator +latent_dim = 128 +generator = keras.Sequential( + [ + keras.Input(shape=(latent_dim,)), + # We want to generate 128 coefficients to reshape into a 7x7x128 map + layers.Dense(7 * 7 * 128), + layers.LeakyReLU(negative_slope=0.2), + layers.Reshape((7, 7, 128)), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2D(1, (7, 7), padding="same", activation="sigmoid"), + ], + name="generator", +) +``` + +Here's a feature-complete GAN class, overriding `compile()` to use its own signature, +and implementing the entire GAN algorithm in 17 lines in `train_step`: + +```python +class GAN(keras.Model): + def __init__(self, discriminator, generator, latent_dim): + super().__init__() + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + self.d_loss_tracker = keras.metrics.Mean(name="d_loss") + self.g_loss_tracker = keras.metrics.Mean(name="g_loss") + self.seed_generator = keras.random.SeedGenerator(1337) + + @property + def metrics(self): + return [self.d_loss_tracker, self.g_loss_tracker] + + def compile(self, d_optimizer, g_optimizer, loss_fn): + super().compile() + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + + def train_step(self, real_images): + if isinstance(real_images, tuple): + real_images = real_images[0] + # Sample random points in the latent space + batch_size = tf.shape(real_images)[0] + random_latent_vectors = keras.random.normal( + shape=(batch_size, self.latent_dim), seed=self.seed_generator + ) + + # Decode them to fake images + generated_images = self.generator(random_latent_vectors) + + # Combine them with real images + combined_images = tf.concat([generated_images, real_images], axis=0) + + # Assemble labels discriminating real from fake images + labels = tf.concat( + [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0 + ) + # Add random noise to the labels - important trick! + labels += 0.05 * keras.random.uniform( + tf.shape(labels), seed=self.seed_generator + ) + + # Train the discriminator + with tf.GradientTape() as tape: + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + grads = tape.gradient(d_loss, self.discriminator.trainable_weights) + self.d_optimizer.apply(grads, self.discriminator.trainable_weights) + + # Sample random points in the latent space + random_latent_vectors = keras.random.normal( + shape=(batch_size, self.latent_dim), seed=self.seed_generator + ) + + # Assemble labels that say "all real images" + misleading_labels = tf.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + with tf.GradientTape() as tape: + predictions = self.discriminator(self.generator(random_latent_vectors)) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, self.generator.trainable_weights) + self.g_optimizer.apply(grads, self.generator.trainable_weights) + + # Update metrics and return their value. + self.d_loss_tracker.update_state(d_loss) + self.g_loss_tracker.update_state(g_loss) + return { + "d_loss": self.d_loss_tracker.result(), + "g_loss": self.g_loss_tracker.result(), + } +``` + +Let's test-drive it: + +```python +# Prepare the dataset. We use both the training & test MNIST digits. +batch_size = 64 +(x_train, _), (x_test, _) = keras.datasets.mnist.load_data() +all_digits = np.concatenate([x_train, x_test]) +all_digits = all_digits.astype("float32") / 255.0 +all_digits = np.reshape(all_digits, (-1, 28, 28, 1)) +dataset = tf.data.Dataset.from_tensor_slices(all_digits) +dataset = dataset.shuffle(buffer_size=1024).batch(batch_size) + +gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim) +gan.compile( + d_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + g_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + loss_fn=keras.losses.BinaryCrossentropy(from_logits=True), +) + +# To limit the execution time, we only train on 100 batches. You can train on +# the entire dataset. You will need about 20 epochs to get nice results. +gan.fit(dataset.take(100), epochs=1) +``` + +The ideas behind deep learning are simple, so why should their implementation be painful? + diff --git a/.tether/vignettes-src/custom_train_step_in_torch.Rmd b/.tether/vignettes-src/custom_train_step_in_torch.Rmd new file mode 100644 index 0000000000..73be089eaf --- /dev/null +++ b/.tether/vignettes-src/custom_train_step_in_torch.Rmd @@ -0,0 +1,484 @@ +--- +title: Customizing what happens in `fit()` with PyTorch +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/27 +last-modified: 2023/06/27 +description: Overriding the training step of the Model class with PyTorch. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras/guides/custom_train_step_in_torch.py +--- + +## Introduction + +When you're doing supervised learning, you can use `fit()` and everything works +smoothly. + +When you need to take control of every little detail, you can write your own training +loop entirely from scratch. + +But what if you need a custom training algorithm, but you still want to benefit from +the convenient features of `fit()`, such as callbacks, built-in distribution support, +or step fusing? + +A core principle of Keras is **progressive disclosure of complexity**. You should +always be able to get into lower-level workflows in a gradual way. You shouldn't fall +off a cliff if the high-level functionality doesn't exactly match your use case. You +should be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience. + +When you need to customize what `fit()` does, you should **override the training step +function of the `Model` class**. This is the function that is called by `fit()` for +every batch of data. You will then be able to call `fit()` as usual -- and it will be +running your own learning algorithm. + +Note that this pattern does not prevent you from building models with the Functional +API. You can do this whether you're building `Sequential` models, Functional API +models, or subclassed models. + +Let's see how that works. + +## Setup + +```python +import os + +# This guide can only be run with the torch backend. +os.environ["KERAS_BACKEND"] = "torch" + +import torch +import keras +from keras import layers +import numpy as np +``` + +## A first simple example + +Let's start from a simple example: + +- We create a new class that subclasses `keras.Model`. +- We just override the method `train_step(self, data)`. +- We return a dictionary mapping metric names (including the loss) to their current +value. + +The input argument `data` is what gets passed to fit as training data: + +- If you pass NumPy arrays, by calling `fit(x, y, ...)`, then `data` will be the tuple +`(x, y)` +- If you pass a `torch.utils.data.DataLoader` or a `tf.data.Dataset`, +by calling `fit(dataset, ...)`, then `data` will be what gets yielded +by `dataset` at each batch. + +In the body of the `train_step()` method, we implement a regular training update, +similar to what you are already familiar with. Importantly, **we compute the loss via +`self.compute_loss()`**, which wraps the loss(es) function(s) that were passed to +`compile()`. + +Similarly, we call `metric.update_state(y, y_pred)` on metrics from `self.metrics`, +to update the state of the metrics that were passed in `compile()`, +and we query results from `self.metrics` at the end to retrieve their current value. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + x, y = data + + # Call torch.nn.Module.zero_grad() to clear the leftover gradients + # for the weights from the previous train step. + self.zero_grad() + + # Compute loss + y_pred = self(x, training=True) # Forward pass + loss = self.compute_loss(y=y, y_pred=y_pred) + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + + trainable_weights = [v for v in self.trainable_weights] + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + self.optimizer.apply(gradients, trainable_weights) + + # Update metrics (includes the metric that tracks the loss) + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred) + + # Return a dict mapping metric names to current value + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} +``` + +Let's try this out: + +```python +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# Just use `fit` as usual +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=3) +``` + +## Going lower-level + +Naturally, you could just skip passing a loss function in `compile()`, and instead do +everything *manually* in `train_step`. Likewise for metrics. + +Here's a lower-level example, that only uses `compile()` to configure the optimizer: + +- We start by creating `Metric` instances to track our loss and a MAE score (in `__init__()`). +- We implement a custom `train_step()` that updates the state of these metrics +(by calling `update_state()` on them), then query them (via `result()`) to return their current average value, +to be displayed by the progress bar and to be pass to any callback. +- Note that we would need to call `reset_states()` on our metrics between each epoch! Otherwise +calling `result()` would return an average since the start of training, whereas we usually work +with per-epoch averages. Thankfully, the framework can do that for us: just list any metric +you want to reset in the `metrics` property of the model. The model will call `reset_states()` +on any object listed here at the beginning of each `fit()` epoch or at the beginning of a call to +`evaluate()`. + +```python +class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_tracker = keras.metrics.Mean(name="loss") + self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae") + self.loss_fn = keras.losses.MeanSquaredError() + + def train_step(self, data): + x, y = data + + # Call torch.nn.Module.zero_grad() to clear the leftover gradients + # for the weights from the previous train step. + self.zero_grad() + + # Compute loss + y_pred = self(x, training=True) # Forward pass + loss = self.loss_fn(y, y_pred) + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + + trainable_weights = [v for v in self.trainable_weights] + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + self.optimizer.apply(gradients, trainable_weights) + + # Compute our own metrics + self.loss_tracker.update_state(loss) + self.mae_metric.update_state(y, y_pred) + return { + "loss": self.loss_tracker.result(), + "mae": self.mae_metric.result(), + } + + @property + def metrics(self): + # We list our `Metric` objects here so that `reset_states()` can be + # called automatically at the start of each epoch + # or at the start of `evaluate()`. + return [self.loss_tracker, self.mae_metric] + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) + +# We don't pass a loss or metrics here. +model.compile(optimizer="adam") + +# Just use `fit` as usual -- you can use callbacks, etc. +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=5) +``` + +## Supporting `sample_weight` & `class_weight` + +You may have noticed that our first basic example didn't make any mention of sample +weighting. If you want to support the `fit()` arguments `sample_weight` and +`class_weight`, you'd simply do the following: + +- Unpack `sample_weight` from the `data` argument +- Pass it to `compute_loss` & `update_state` (of course, you could also just apply +it manually if you don't rely on `compile()` for losses & metrics) +- That's it. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + if len(data) == 3: + x, y, sample_weight = data + else: + sample_weight = None + x, y = data + + # Call torch.nn.Module.zero_grad() to clear the leftover gradients + # for the weights from the previous train step. + self.zero_grad() + + # Compute loss + y_pred = self(x, training=True) # Forward pass + loss = self.compute_loss( + y=y, + y_pred=y_pred, + sample_weight=sample_weight, + ) + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + + trainable_weights = [v for v in self.trainable_weights] + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + self.optimizer.apply(gradients, trainable_weights) + + # Update metrics (includes the metric that tracks the loss) + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred, sample_weight=sample_weight) + + # Return a dict mapping metric names to current value + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# You can now use sample_weight argument +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +sw = np.random.random((1000, 1)) +model.fit(x, y, sample_weight=sw, epochs=3) +``` + +## Providing your own evaluation step + +What if you want to do the same for calls to `model.evaluate()`? Then you would +override `test_step` in exactly the same way. Here's what it looks like: + +```python +class CustomModel(keras.Model): + def test_step(self, data): + # Unpack the data + x, y = data + # Compute predictions + y_pred = self(x, training=False) + # Updates the metrics tracking the loss + loss = self.compute_loss(y=y, y_pred=y_pred) + # Update the metrics. + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred) + # Return a dict mapping metric names to current value. + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(loss="mse", metrics=["mae"]) + +# Evaluate with our custom test_step +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.evaluate(x, y) +``` + +## Wrapping up: an end-to-end GAN example + +Let's walk through an end-to-end example that leverages everything you just learned. + +Let's consider: + +- A generator network meant to generate 28x28x1 images. +- A discriminator network meant to classify 28x28x1 images into two classes ("fake" and +"real"). +- One optimizer for each. +- A loss function to train the discriminator. + +```python +# Create the discriminator +discriminator = keras.Sequential( + [ + keras.Input(shape=(28, 28, 1)), + layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.GlobalMaxPooling2D(), + layers.Dense(1), + ], + name="discriminator", +) + +# Create the generator +latent_dim = 128 +generator = keras.Sequential( + [ + keras.Input(shape=(latent_dim,)), + # We want to generate 128 coefficients to reshape into a 7x7x128 map + layers.Dense(7 * 7 * 128), + layers.LeakyReLU(negative_slope=0.2), + layers.Reshape((7, 7, 128)), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2D(1, (7, 7), padding="same", activation="sigmoid"), + ], + name="generator", +) +``` + +Here's a feature-complete GAN class, overriding `compile()` to use its own signature, +and implementing the entire GAN algorithm in 17 lines in `train_step`: + +```python +class GAN(keras.Model): + def __init__(self, discriminator, generator, latent_dim): + super().__init__() + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + self.d_loss_tracker = keras.metrics.Mean(name="d_loss") + self.g_loss_tracker = keras.metrics.Mean(name="g_loss") + self.seed_generator = keras.random.SeedGenerator(1337) + self.built = True + + @property + def metrics(self): + return [self.d_loss_tracker, self.g_loss_tracker] + + def compile(self, d_optimizer, g_optimizer, loss_fn): + super().compile() + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + + def train_step(self, real_images): + if isinstance(real_images, tuple): + real_images = real_images[0] + # Sample random points in the latent space + batch_size = real_images.shape[0] + random_latent_vectors = keras.random.normal( + shape=(batch_size, self.latent_dim), seed=self.seed_generator + ) + + # Decode them to fake images + generated_images = self.generator(random_latent_vectors) + + # Combine them with real images + real_images = torch.tensor(real_images) + combined_images = torch.concat([generated_images, real_images], axis=0) + + # Assemble labels discriminating real from fake images + labels = torch.concat( + [torch.ones((batch_size, 1)), torch.zeros((batch_size, 1))], axis=0 + ) + # Add random noise to the labels - important trick! + labels += 0.05 * keras.random.uniform( + labels.shape, seed=self.seed_generator + ) + + # Train the discriminator + self.zero_grad() + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + d_loss.backward() + grads = [v.value.grad for v in self.discriminator.trainable_weights] + with torch.no_grad(): + self.d_optimizer.apply(grads, self.discriminator.trainable_weights) + + # Sample random points in the latent space + random_latent_vectors = keras.random.normal( + shape=(batch_size, self.latent_dim), seed=self.seed_generator + ) + + # Assemble labels that say "all real images" + misleading_labels = torch.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + self.zero_grad() + predictions = self.discriminator(self.generator(random_latent_vectors)) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = g_loss.backward() + grads = [v.value.grad for v in self.generator.trainable_weights] + with torch.no_grad(): + self.g_optimizer.apply(grads, self.generator.trainable_weights) + + # Update metrics and return their value. + self.d_loss_tracker.update_state(d_loss) + self.g_loss_tracker.update_state(g_loss) + return { + "d_loss": self.d_loss_tracker.result(), + "g_loss": self.g_loss_tracker.result(), + } +``` + +Let's test-drive it: + +```python +# Prepare the dataset. We use both the training & test MNIST digits. +batch_size = 64 +(x_train, _), (x_test, _) = keras.datasets.mnist.load_data() +all_digits = np.concatenate([x_train, x_test]) +all_digits = all_digits.astype("float32") / 255.0 +all_digits = np.reshape(all_digits, (-1, 28, 28, 1)) + +# Create a TensorDataset +dataset = torch.utils.data.TensorDataset( + torch.from_numpy(all_digits), torch.from_numpy(all_digits) +) +# Create a DataLoader +dataloader = torch.utils.data.DataLoader( + dataset, batch_size=batch_size, shuffle=True +) + +gan = GAN( + discriminator=discriminator, generator=generator, latent_dim=latent_dim +) +gan.compile( + d_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + g_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + loss_fn=keras.losses.BinaryCrossentropy(from_logits=True), +) + +gan.fit(dataloader, epochs=1) +``` + +The ideas behind deep learning are simple, so why should their implementation be painful? + diff --git a/.tether/vignettes-src/customizing_saving_and_serialization.Rmd b/.tether/vignettes-src/customizing_saving_and_serialization.Rmd new file mode 100644 index 0000000000..c2f744a3f7 --- /dev/null +++ b/.tether/vignettes-src/customizing_saving_and_serialization.Rmd @@ -0,0 +1,328 @@ +--- +title: Customizing Saving and Serialization +author: Neel Kovelamudi +date-created: 2023/03/15 +last-modified: 2023/03/15 +description: A more advanced guide on customizing saving for your layers and models. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/customizing_saving_and_serialization.py +--- + +## Introduction + +This guide covers advanced methods that can be customized in Keras saving. For most +users, the methods outlined in the primary +[Serialize, save, and export guide](https://keras.io/guides/serialization_and_saving) +are sufficient. + +### APIs +We will cover the following APIs: + +- `save_assets()` and `load_assets()` +- `save_own_variables()` and `load_own_variables()` +- `get_build_config()` and `build_from_config()` +- `get_compile_config()` and `compile_from_config()` + +When restoring a model, these get executed in the following order: + +- `build_from_config()` +- `compile_from_config()` +- `load_own_variables()` +- `load_assets()` + +## Setup + +```python +import os +import numpy as np +import keras +``` + +## State saving customization + +These methods determine how the state of your model's layers is saved when calling +`model.save()`. You can override them to take full control of the state saving process. + +### `save_own_variables()` and `load_own_variables()` + +These methods save and load the state variables of the layer when `model.save()` and +`keras.models.load_model()` are called, respectively. By default, the state variables +saved and loaded are the weights of the layer (both trainable and non-trainable). Here is +the default implementation of `save_own_variables()`: + +```python +def save_own_variables(self, store): + all_vars = self._trainable_weights + self._non_trainable_weights + for i, v in enumerate(all_vars): + store[f"{i}"] = v.numpy() +``` + +The store used by these methods is a dictionary that can be populated with the layer +variables. Let's take a look at an example customizing this. + +**Example:** + +```python +@keras.utils.register_keras_serializable(package="my_custom_package") +class LayerWithCustomVariable(keras.layers.Dense): + def __init__(self, units, **kwargs): + super().__init__(units, **kwargs) + self.my_variable = keras.Variable( + np.random.random((units,)), name="my_variable", dtype="float32" + ) + + def save_own_variables(self, store): + super().save_own_variables(store) + # Stores the value of the variable upon saving + store["variables"] = self.my_variable.numpy() + + def load_own_variables(self, store): + # Assigns the value of the variable upon loading + self.my_variable.assign(store["variables"]) + # Load the remaining weights + for i, v in enumerate(self.weights): + v.assign(store[f"{i}"]) + # Note: You must specify how all variables (including layer weights) + # are loaded in `load_own_variables.` + + def call(self, inputs): + dense_out = super().call(inputs) + return dense_out + self.my_variable + + +model = keras.Sequential([LayerWithCustomVariable(1)]) + +ref_input = np.random.random((8, 10)) +ref_output = np.random.random((8, 10)) +model.compile(optimizer="adam", loss="mean_squared_error") +model.fit(ref_input, ref_output) + +model.save("custom_vars_model.keras") +restored_model = keras.models.load_model("custom_vars_model.keras") + +np.testing.assert_allclose( + model.layers[0].my_variable.numpy(), + restored_model.layers[0].my_variable.numpy(), +) +``` + +### `save_assets()` and `load_assets()` + +These methods can be added to your model class definition to store and load any +additional information that your model needs. + +For example, NLP domain layers such as TextVectorization layers and IndexLookup layers +may need to store their associated vocabulary (or lookup table) in a text file upon +saving. + +Let's take at the basics of this workflow with a simple file `assets.txt`. + +**Example:** + +```python +@keras.saving.register_keras_serializable(package="my_custom_package") +class LayerWithCustomAssets(keras.layers.Dense): + def __init__(self, vocab=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.vocab = vocab + + def save_assets(self, inner_path): + # Writes the vocab (sentence) to text file at save time. + with open(os.path.join(inner_path, "vocabulary.txt"), "w") as f: + f.write(self.vocab) + + def load_assets(self, inner_path): + # Reads the vocab (sentence) from text file at load time. + with open(os.path.join(inner_path, "vocabulary.txt"), "r") as f: + text = f.read() + self.vocab = text.replace("", "little") + + +model = keras.Sequential( + [LayerWithCustomAssets(vocab="Mary had a lamb.", units=5)] +) + +x = np.random.random((10, 10)) +y = model(x) + +model.save("custom_assets_model.keras") +restored_model = keras.models.load_model("custom_assets_model.keras") + +np.testing.assert_string_equal( + restored_model.layers[0].vocab, "Mary had a little lamb." +) +``` + +## `build` and `compile` saving customization + +### `get_build_config()` and `build_from_config()` + +These methods work together to save the layer's built states and restore them upon +loading. + +By default, this only includes a build config dictionary with the layer's input shape, +but overriding these methods can be used to include further Variables and Lookup Tables +that can be useful to restore for your built model. + +**Example:** + +```python +@keras.saving.register_keras_serializable(package="my_custom_package") +class LayerWithCustomBuild(keras.layers.Layer): + def __init__(self, units=32, **kwargs): + super().__init__(**kwargs) + self.units = units + + def call(self, inputs): + return keras.ops.matmul(inputs, self.w) + self.b + + def get_config(self): + return dict(units=self.units, **super().get_config()) + + def build(self, input_shape, layer_init): + # Note the overriding of `build()` to add an extra argument. + # Therefore, we will need to manually call build with `layer_init` argument + # before the first execution of `call()`. + super().build(input_shape) + self._input_shape = input_shape + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer=layer_init, + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), + initializer=layer_init, + trainable=True, + ) + self.layer_init = layer_init + + def get_build_config(self): + build_config = { + "layer_init": self.layer_init, + "input_shape": self._input_shape, + } # Stores our initializer for `build()` + return build_config + + def build_from_config(self, config): + # Calls `build()` with the parameters at loading time + self.build(config["input_shape"], config["layer_init"]) + + +custom_layer = LayerWithCustomBuild(units=16) +custom_layer.build(input_shape=(8,), layer_init="random_normal") + +model = keras.Sequential( + [ + custom_layer, + keras.layers.Dense(1, activation="sigmoid"), + ] +) + +x = np.random.random((16, 8)) +y = model(x) + +model.save("custom_build_model.keras") +restored_model = keras.models.load_model("custom_build_model.keras") + +np.testing.assert_equal(restored_model.layers[0].layer_init, "random_normal") +np.testing.assert_equal(restored_model.built, True) +``` + +### `get_compile_config()` and `compile_from_config()` + +These methods work together to save the information with which the model was compiled +(optimizers, losses, etc.) and restore and re-compile the model with this information. + +Overriding these methods can be useful for compiling the restored model with custom +optimizers, custom losses, etc., as these will need to be deserialized prior to calling +`model.compile` in `compile_from_config()`. + +Let's take a look at an example of this. + +**Example:** + +```python +@keras.saving.register_keras_serializable(package="my_custom_package") +def small_square_sum_loss(y_true, y_pred): + loss = keras.ops.square(y_pred - y_true) + loss = loss / 10.0 + loss = keras.ops.sum(loss, axis=1) + return loss + + +@keras.saving.register_keras_serializable(package="my_custom_package") +def mean_pred(y_true, y_pred): + return keras.ops.mean(y_pred) + + +@keras.saving.register_keras_serializable(package="my_custom_package") +class ModelWithCustomCompile(keras.Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.dense1 = keras.layers.Dense(8, activation="relu") + self.dense2 = keras.layers.Dense(4, activation="softmax") + + def call(self, inputs): + x = self.dense1(inputs) + return self.dense2(x) + + def compile(self, optimizer, loss_fn, metrics): + super().compile(optimizer=optimizer, loss=loss_fn, metrics=metrics) + self.model_optimizer = optimizer + self.loss_fn = loss_fn + self.loss_metrics = metrics + + def get_compile_config(self): + # These parameters will be serialized at saving time. + return { + "model_optimizer": self.model_optimizer, + "loss_fn": self.loss_fn, + "metric": self.loss_metrics, + } + + def compile_from_config(self, config): + # Deserializes the compile parameters (important, since many are custom) + optimizer = keras.utils.deserialize_keras_object(config["model_optimizer"]) + loss_fn = keras.utils.deserialize_keras_object(config["loss_fn"]) + metrics = keras.utils.deserialize_keras_object(config["metric"]) + + # Calls compile with the deserialized parameters + self.compile(optimizer=optimizer, loss_fn=loss_fn, metrics=metrics) + + +model = ModelWithCustomCompile() +model.compile( + optimizer="SGD", loss_fn=small_square_sum_loss, metrics=["accuracy", mean_pred] +) + +x = np.random.random((4, 8)) +y = np.random.random((4,)) + +model.fit(x, y) + +model.save("custom_compile_model.keras") +restored_model = keras.models.load_model("custom_compile_model.keras") + +np.testing.assert_equal(model.model_optimizer, restored_model.model_optimizer) +np.testing.assert_equal(model.loss_fn, restored_model.loss_fn) +np.testing.assert_equal(model.loss_metrics, restored_model.loss_metrics) +``` + +## Conclusion + +Using the methods learned in this tutorial allows for a wide variety of use cases, +allowing the saving and loading of complex models with exotic assets and state +elements. To recap: + +- `save_own_variables` and `load_own_variables` determine how your states are saved +and loaded. +- `save_assets` and `load_assets` can be added to store and load any additional +information your model needs. +- `get_build_config` and `build_from_config` save and restore the model's built +states. +- `get_compile_config` and `compile_from_config` save and restore the model's +compiled states. + diff --git a/.tether/vignettes-src/customizing_what_happens_in_fit.Rmd b/.tether/vignettes-src/customizing_what_happens_in_fit.Rmd new file mode 100644 index 0000000000..8528d96b4f --- /dev/null +++ b/.tether/vignettes-src/customizing_what_happens_in_fit.Rmd @@ -0,0 +1,432 @@ +--- +title: Customizing what happens in `fit()` +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2020/04/15 +last-modified: 2023/06/14 +description: Complete guide to overriding the training step of the Model class. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette) +--- + +## Introduction + +When you're doing supervised learning, you can use `fit()` and everything works +smoothly. + +When you need to write your own training loop from scratch, you can use the +`GradientTape` and take control of every little detail. + +But what if you need a custom training algorithm, but you still want to benefit from +the convenient features of `fit()`, such as callbacks, built-in distribution support, +or step fusing? + +A core principle of Keras is **progressive disclosure of complexity**. You should +always be able to get into lower-level workflows in a gradual way. You shouldn't fall +off a cliff if the high-level functionality doesn't exactly match your use case. You +should be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience. + +When you need to customize what `fit()` does, you should **override the training step +function of the `Model` class**. This is the function that is called by `fit()` for +every batch of data. You will then be able to call `fit()` as usual -- and it will be +running your own learning algorithm. + +Note that this pattern does not prevent you from building models with the Functional +API. You can do this whether you're building `Sequential` models, Functional API +models, or subclassed models. + +Let's see how that works. + +## Setup + +Requires TensorFlow 2.8 or later. + +```python +import tensorflow as tf +import keras +``` + +## A first simple example + +Let's start from a simple example: + +- We create a new class that subclasses `keras.Model`. +- We just override the method `train_step(self, data)`. +- We return a dictionary mapping metric names (including the loss) to their current +value. + +The input argument `data` is what gets passed to fit as training data: + +- If you pass Numpy arrays, by calling `fit(x, y, ...)`, then `data` will be the tuple +`(x, y)` +- If you pass a `tf.data.Dataset`, by calling `fit(dataset, ...)`, then `data` will be +what gets yielded by `dataset` at each batch. + +In the body of the `train_step` method, we implement a regular training update, +similar to what you are already familiar with. Importantly, **we compute the loss via +`self.compute_loss()`**, which wraps the loss(es) function(s) that were passed to +`compile()`. + +Similarly, we call `metric.update_state(y, y_pred)` on metrics from `self.metrics`, +to update the state of the metrics that were passed in `compile()`, +and we query results from `self.metrics` at the end to retrieve their current value. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + x, y = data + + with tf.GradientTape() as tape: + y_pred = self(x, training=True) # Forward pass + # Compute the loss value + # (the loss function is configured in `compile()`) + loss = self.compute_loss(y=y, y_pred=y_pred) + + # Compute gradients + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + # Update weights + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + # Update metrics (includes the metric that tracks the loss) + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred) + # Return a dict mapping metric names to current value + return {m.name: m.result() for m in self.metrics} +``` + +Let's try this out: + +```python +import numpy as np + +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# Just use `fit` as usual +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=3) +``` + +## Going lower-level + +Naturally, you could just skip passing a loss function in `compile()`, and instead do +everything *manually* in `train_step`. Likewise for metrics. + +Here's a lower-level example, that only uses `compile()` to configure the optimizer: + +- We start by creating `Metric` instances to track our loss and a MAE score (in `__init__()`). +- We implement a custom `train_step()` that updates the state of these metrics +(by calling `update_state()` on them), then query them (via `result()`) to return their current average value, +to be displayed by the progress bar and to be pass to any callback. +- Note that we would need to call `reset_states()` on our metrics between each epoch! Otherwise +calling `result()` would return an average since the start of training, whereas we usually work +with per-epoch averages. Thankfully, the framework can do that for us: just list any metric +you want to reset in the `metrics` property of the model. The model will call `reset_states()` +on any object listed here at the beginning of each `fit()` epoch or at the beginning of a call to +`evaluate()`. + +```python +class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_tracker = keras.metrics.Mean(name="loss") + self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae") + + def train_step(self, data): + x, y = data + + with tf.GradientTape() as tape: + y_pred = self(x, training=True) # Forward pass + # Compute our own loss + loss = keras.losses.mean_squared_error(y, y_pred) + + # Compute gradients + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + + # Update weights + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + + # Compute our own metrics + self.loss_tracker.update_state(loss) + self.mae_metric.update_state(y, y_pred) + return {"loss": self.loss_tracker.result(), "mae": self.mae_metric.result()} + + @property + def metrics(self): + # We list our `Metric` objects here so that `reset_states()` can be + # called automatically at the start of each epoch + # or at the start of `evaluate()`. + # If you don't implement this property, you have to call + # `reset_states()` yourself at the time of your choosing. + return [self.loss_tracker, self.mae_metric] + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) + +# We don't passs a loss or metrics here. +model.compile(optimizer="adam") + +# Just use `fit` as usual -- you can use callbacks, etc. +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=5) +``` + +## Supporting `sample_weight` & `class_weight` + +You may have noticed that our first basic example didn't make any mention of sample +weighting. If you want to support the `fit()` arguments `sample_weight` and +`class_weight`, you'd simply do the following: + +- Unpack `sample_weight` from the `data` argument +- Pass it to `compute_loss` & `update_state` (of course, you could also just apply +it manually if you don't rely on `compile()` for losses & metrics) +- That's it. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + if len(data) == 3: + x, y, sample_weight = data + else: + sample_weight = None + x, y = data + + with tf.GradientTape() as tape: + y_pred = self(x, training=True) # Forward pass + # Compute the loss value. + # The loss function is configured in `compile()`. + loss = self.compute_loss( + y=y, + y_pred=y_pred, + sample_weight=sample_weight, + ) + + # Compute gradients + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + + # Update weights + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + + # Update the metrics. + # Metrics are configured in `compile()`. + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred, sample_weight=sample_weight) + + # Return a dict mapping metric names to current value. + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# You can now use sample_weight argument +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +sw = np.random.random((1000, 1)) +model.fit(x, y, sample_weight=sw, epochs=3) +``` + +## Providing your own evaluation step + +What if you want to do the same for calls to `model.evaluate()`? Then you would +override `test_step` in exactly the same way. Here's what it looks like: + +```python +class CustomModel(keras.Model): + def test_step(self, data): + # Unpack the data + x, y = data + # Compute predictions + y_pred = self(x, training=False) + # Updates the metrics tracking the loss + self.compute_loss(y=y, y_pred=y_pred) + # Update the metrics. + for metric in self.metrics: + if metric.name != "loss": + metric.update_state(y, y_pred) + # Return a dict mapping metric names to current value. + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(loss="mse", metrics=["mae"]) + +# Evaluate with our custom test_step +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.evaluate(x, y) +``` + +## Wrapping up: an end-to-end GAN example + +Let's walk through an end-to-end example that leverages everything you just learned. + +Let's consider: + +- A generator network meant to generate 28x28x1 images. +- A discriminator network meant to classify 28x28x1 images into two classes ("fake" and +"real"). +- One optimizer for each. +- A loss function to train the discriminator. + +```python +from tensorflow.keras import layers + +# Create the discriminator +discriminator = keras.Sequential( + [ + keras.Input(shape=(28, 28, 1)), + layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(alpha=0.2), + layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(alpha=0.2), + layers.GlobalMaxPooling2D(), + layers.Dense(1), + ], + name="discriminator", +) + +# Create the generator +latent_dim = 128 +generator = keras.Sequential( + [ + keras.Input(shape=(latent_dim,)), + # We want to generate 128 coefficients to reshape into a 7x7x128 map + layers.Dense(7 * 7 * 128), + layers.LeakyReLU(alpha=0.2), + layers.Reshape((7, 7, 128)), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(alpha=0.2), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(alpha=0.2), + layers.Conv2D(1, (7, 7), padding="same", activation="sigmoid"), + ], + name="generator", +) +``` + +Here's a feature-complete GAN class, overriding `compile()` to use its own signature, +and implementing the entire GAN algorithm in 17 lines in `train_step`: + +```python +class GAN(keras.Model): + def __init__(self, discriminator, generator, latent_dim): + super().__init__() + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + self.d_loss_tracker = keras.metrics.Mean(name="d_loss") + self.g_loss_tracker = keras.metrics.Mean(name="g_loss") + + def compile(self, d_optimizer, g_optimizer, loss_fn): + super().compile() + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + + def train_step(self, real_images): + if isinstance(real_images, tuple): + real_images = real_images[0] + # Sample random points in the latent space + batch_size = tf.shape(real_images)[0] + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + + # Decode them to fake images + generated_images = self.generator(random_latent_vectors) + + # Combine them with real images + combined_images = tf.concat([generated_images, real_images], axis=0) + + # Assemble labels discriminating real from fake images + labels = tf.concat( + [tf.ones((batch_size, 1)), tf.zeros((batch_size, 1))], axis=0 + ) + # Add random noise to the labels - important trick! + labels += 0.05 * tf.random.uniform(tf.shape(labels)) + + # Train the discriminator + with tf.GradientTape() as tape: + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + grads = tape.gradient(d_loss, self.discriminator.trainable_weights) + self.d_optimizer.apply_gradients( + zip(grads, self.discriminator.trainable_weights) + ) + + # Sample random points in the latent space + random_latent_vectors = tf.random.normal(shape=(batch_size, self.latent_dim)) + + # Assemble labels that say "all real images" + misleading_labels = tf.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + with tf.GradientTape() as tape: + predictions = self.discriminator(self.generator(random_latent_vectors)) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, self.generator.trainable_weights) + self.g_optimizer.apply_gradients(zip(grads, self.generator.trainable_weights)) + + # Update metrics and return their value. + self.d_loss_tracker.update_state(d_loss) + self.g_loss_tracker.update_state(g_loss) + return { + "d_loss": self.d_loss_tracker.result(), + "g_loss": self.g_loss_tracker.result(), + } +``` + +Let's test-drive it: + +```python +# Prepare the dataset. We use both the training & test MNIST digits. +batch_size = 64 +(x_train, _), (x_test, _) = keras.datasets.mnist.load_data() +all_digits = np.concatenate([x_train, x_test]) +all_digits = all_digits.astype("float32") / 255.0 +all_digits = np.reshape(all_digits, (-1, 28, 28, 1)) +dataset = tf.data.Dataset.from_tensor_slices(all_digits) +dataset = dataset.shuffle(buffer_size=1024).batch(batch_size) + +gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim) +gan.compile( + d_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + g_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + loss_fn=keras.losses.BinaryCrossentropy(from_logits=True), +) + +# To limit the execution time, we only train on 100 batches. You can train on +# the entire dataset. You will need about 20 epochs to get nice results. +gan.fit(dataset.take(100), epochs=1) +``` + +The ideas behind deep learning are simple, so why should their implementation be painful? diff --git a/.tether/vignettes-src/distributed_training.Rmd b/.tether/vignettes-src/distributed_training.Rmd new file mode 100644 index 0000000000..024879df83 --- /dev/null +++ b/.tether/vignettes-src/distributed_training.Rmd @@ -0,0 +1,446 @@ +--- +title: Multi-GPU and distributed training +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2020/04/28 +last-modified: 2020/04/29 +description: Guide to multi-GPU & distributed training for Keras models. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette) +--- + +## Introduction + +There are generally two ways to distribute computation across multiple devices: + +**Data parallelism**, where a single model gets replicated on multiple devices or +multiple machines. Each of them processes different batches of data, then they merge +their results. There exist many variants of this setup, that differ in how the different +model replicas merge results, in whether they stay in sync at every batch or whether they +are more loosely coupled, etc. + +**Model parallelism**, where different parts of a single model run on different devices, +processing a single batch of data together. This works best with models that have a +naturally-parallel architecture, such as models that feature multiple branches. + +This guide focuses on data parallelism, in particular **synchronous data parallelism**, +where the different replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you would see for +single-device training. + +Specifically, this guide teaches you how to use the `tf.distribute` API to train Keras +models on multiple GPUs, with minimal changes to your code, in the following two setups: + +- On multiple GPUs (typically 2 to 8) installed on a single machine (single host, +multi-device training). This is the most common setup for researchers and small-scale +industry workflows. +- On a cluster of many machines, each hosting one or multiple GPUs (multi-worker +distributed training). This is a good setup for large-scale industry workflows, e.g. +training high-resolution image classification models on tens of millions of images using +20-100 GPUs. + +## Setup + +```python +import tensorflow as tf +import keras +``` + +## Single-host, multi-device synchronous training + +In this setup, you have one machine with several GPUs on it (typically 2 to 8). Each +device will run a copy of your model (called a **replica**). For simplicity, in what +follows, we'll assume we're dealing with 8 GPUs, at no loss of generality. + +**How it works** + +At each step of training: + +- The current batch of data (called **global batch**) is split into 8 different +sub-batches (called **local batches**). For instance, if the global batch has 512 +samples, each of the 8 local batches will have 64 samples. +- Each of the 8 replicas independently processes a local batch: they run a forward pass, +then a backward pass, outputting the gradient of the weights with respect to the loss of +the model on the local batch. +- The weight updates originating from local gradients are efficiently merged across the 8 +replicas. Because this is done at the end of every step, the replicas always stay in +sync. + +In practice, the process of synchronously updating the weights of the model replicas is +handled at the level of each individual weight variable. This is done through a **mirrored +variable** object. + +**How to use it** + +To do single-host, multi-device synchronous training with a Keras model, you would use +the [`tf.distribute.MirroredStrategy` API]( + https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). +Here's how it works: + +- Instantiate a `MirroredStrategy`, optionally configuring which specific devices you +want to use (by default the strategy will use all GPUs available). +- Use the strategy object to open a scope, and within this scope, create all the Keras +objects you need that contain variables. Typically, that means **creating & compiling the +model** inside the distribution scope. +- Train the model via `fit()` as usual. + +Importantly, we recommend that you use `tf.data.Dataset` objects to load data +in a multi-device or distributed workflow. + +Schematically, it looks like this: + +```python +# Create a MirroredStrategy. +strategy = tf.distribute.MirroredStrategy() +print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) + +# Open a strategy scope. +with strategy.scope(): + # Everything that creates variables should be under the strategy scope. + # In general this is only model construction & `compile()`. + model = Model(...) + model.compile(...) + +# Train the model on all available devices. +model.fit(train_dataset, validation_data=val_dataset, ...) + +# Test the model on all available devices. +model.evaluate(test_dataset) +``` + +Here's a simple end-to-end runnable example: + +```python +def get_compiled_model(): + # Make a simple 2-layer densely-connected neural network. + inputs = keras.Input(shape=(784,)) + x = keras.layers.Dense(256, activation="relu")(inputs) + x = keras.layers.Dense(256, activation="relu")(x) + outputs = keras.layers.Dense(10)(x) + model = keras.Model(inputs, outputs) + model.compile( + optimizer=keras.optimizers.Adam(), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=[keras.metrics.SparseCategoricalAccuracy()], + ) + return model + + +def get_dataset(): + batch_size = 32 + num_val_samples = 10000 + + # Return the MNIST dataset in the form of a `tf.data.Dataset`. + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Preprocess the data (these are Numpy arrays) + x_train = x_train.reshape(-1, 784).astype("float32") / 255 + x_test = x_test.reshape(-1, 784).astype("float32") / 255 + y_train = y_train.astype("float32") + y_test = y_test.astype("float32") + + # Reserve num_val_samples samples for validation + x_val = x_train[-num_val_samples:] + y_val = y_train[-num_val_samples:] + x_train = x_train[:-num_val_samples] + y_train = y_train[:-num_val_samples] + return ( + tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size), + tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size), + tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size), + ) + + +# Create a MirroredStrategy. +strategy = tf.distribute.MirroredStrategy() +print("Number of devices: {}".format(strategy.num_replicas_in_sync)) + +# Open a strategy scope. +with strategy.scope(): + # Everything that creates variables should be under the strategy scope. + # In general this is only model construction & `compile()`. + model = get_compiled_model() + +# Train the model on all available devices. +train_dataset, val_dataset, test_dataset = get_dataset() +model.fit(train_dataset, epochs=2, validation_data=val_dataset) + +# Test the model on all available devices. +model.evaluate(test_dataset) +``` + +## Using callbacks to ensure fault tolerance + +When using distributed training, you should always make sure you have a strategy to +recover from failure (fault tolerance). The simplest way to handle this is to pass +`ModelCheckpoint` callback to `fit()`, to save your model +at regular intervals (e.g. every 100 batches or every epoch). You can then restart +training from your saved model. + +Here's a simple example: + +```python +import os +from tensorflow import keras + +# Prepare a directory to store all the checkpoints. +checkpoint_dir = "./ckpt" +if not os.path.exists(checkpoint_dir): + os.makedirs(checkpoint_dir) + + +def make_or_restore_model(): + # Either restore the latest model, or create a fresh one + # if there is no checkpoint available. + checkpoints = [checkpoint_dir + "/" + name for name in os.listdir(checkpoint_dir)] + if checkpoints: + latest_checkpoint = max(checkpoints, key=os.path.getctime) + print("Restoring from", latest_checkpoint) + return keras.models.load_model(latest_checkpoint) + print("Creating a new model") + return get_compiled_model() + + +def run_training(epochs=1): + # Create a MirroredStrategy. + strategy = tf.distribute.MirroredStrategy() + + # Open a strategy scope and create/restore the model + with strategy.scope(): + model = make_or_restore_model() + + callbacks = [ + # This callback saves a SavedModel every epoch + # We include the current epoch in the folder name. + keras.callbacks.ModelCheckpoint( + filepath=checkpoint_dir + "/ckpt-{epoch}", save_freq="epoch" + ) + ] + model.fit( + train_dataset, + epochs=epochs, + callbacks=callbacks, + validation_data=val_dataset, + verbose=2, + ) + + +# Running the first time creates the model +run_training(epochs=1) + +# Calling the same function again will resume from where we left off +run_training(epochs=1) +``` + +## `tf.data` performance tips + +When doing distributed training, the efficiency with which you load data can often become +critical. Here are a few tips to make sure your `tf.data` pipelines +run as fast as possible. + +**Note about dataset batching** + +When creating your dataset, make sure it is batched with the global batch size. +For instance, if each of your 8 GPUs is capable of running a batch of 64 samples, you +call use a global batch size of 512. + +**Calling `dataset.cache()`** + +If you call `.cache()` on a dataset, its data will be cached after running through the +first iteration over the data. Every subsequent iteration will use the cached data. The +cache can be in memory (default) or to a local file you specify. + +This can improve performance when: + +- Your data is not expected to change from iteration to iteration +- You are reading data from a remote distributed filesystem +- You are reading data from local disk, but your data would fit in memory and your +workflow is significantly IO-bound (e.g. reading & decoding image files). + +**Calling `dataset.prefetch(buffer_size)`** + +You should almost always call `.prefetch(buffer_size)` after creating a dataset. It means +your data pipeline will run asynchronously from your model, +with new samples being preprocessed and stored in a buffer while the current batch +samples are used to train the model. The next batch will be prefetched in GPU memory by +the time the current batch is over. + +## Multi-worker distributed synchronous training + +**How it works** + +In this setup, you have multiple machines (called **workers**), each with one or several +GPUs on them. Much like what happens for single-host training, +each available GPU will run one model replica, and the value of the variables of each +replica is kept in sync after each batch. + +Importantly, the current implementation assumes that all workers have the same number of +GPUs (homogeneous cluster). + +**How to use it** + +1. Set up a cluster (we provide pointers below). +2. Set up an appropriate `TF_CONFIG` environment variable on each worker. This tells the +worker what its role is and how to communicate with its peers. +3. On each worker, run your model construction & compilation code within the scope of a +[`MultiWorkerMirroredStrategy` object]( + https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy), +similarly to we did for single-host training. +4. Run evaluation code on a designated evaluator machine. + +**Setting up a cluster** + +First, set up a cluster (collective of machines). Each machine individually should be +setup so as to be able to run your model (typically, each machine will run the same +Docker image) and to able to access your data source (e.g. GCS). + +Cluster management is beyond the scope of this guide. +[Here is a document]( + https://cloud.google.com/ai-platform/training/docs/distributed-training-containers) +to help you get started. +You can also take a look at [Kubeflow](https://www.kubeflow.org/). + +**Setting up the `TF_CONFIG` environment variable** + +While the code running on each worker is almost the same as the code used in the +single-host workflow (except with a different `tf.distribute` strategy object), one +significant difference between the single-host workflow and the multi-worker workflow is +that you need to set a `TF_CONFIG` environment variable on each machine running in your +cluster. + +The `TF_CONFIG` environment variable is a JSON string that specifies: + +- The cluster configuration, while the list of addresses & ports of the machines that +make up the cluster +- The worker's "task", which is the role that this specific machine has to play within +the cluster. + +One example of TF_CONFIG is: + +``` +os.environ['TF_CONFIG'] = json.dumps({ + 'cluster': { + 'worker': ["localhost:12345", "localhost:23456"] + }, + 'task': {'type': 'worker', 'index': 0} +}) +``` + +In the multi-worker synchronous training setup, valid roles (task types) for the machines +are "worker" and "evaluator". + +For example, if you have 8 machines with 4 GPUs each, you could have 7 workers and one +evaluator. + +- The workers train the model, each one processing sub-batches of a global batch. +- One of the workers (worker 0) will serve as "chief", a particular kind of worker that +is responsible for saving logs and checkpoints for later reuse (typically to a Cloud +storage location). +- The evaluator runs a continuous loop that loads the latest checkpoint saved by the +chief worker, runs evaluation on it (asynchronously from the other workers) and writes +evaluation logs (e.g. TensorBoard logs). + + +**Running code on each worker** + +You would run training code on each worker (including the chief) and evaluation code on +the evaluator. + +The training code is basically the same as what you would use in the single-host setup, +except using `MultiWorkerMirroredStrategy` instead of `MirroredStrategy`. + +Each worker would run the same code (minus the difference explained in the note below), +including the same callbacks. + +**Note:** Callbacks that save model checkpoints or logs should save to a different +directory for each worker. It is standard practice that all workers should save to local +disk (which is typically temporary), **except worker 0**, which would save TensorBoard +logs checkpoints to a Cloud storage location for later access & reuse. + +The evaluator would simply use `MirroredStrategy` (since it runs on a single machine and +does not need to communicate with other machines) and call `model.evaluate()`. It would be +loading the latest checkpoint saved by the chief worker to a Cloud storage location, and +would save evaluation logs to the same location as the chief logs. + +### Example: code running in a multi-worker setup + +On the chief (worker 0): + +```python +# Set TF_CONFIG +os.environ['TF_CONFIG'] = json.dumps({ + 'cluster': { + 'worker': ["localhost:12345", "localhost:23456"] + }, + 'task': {'type': 'worker', 'index': 0} +}) + + +# Open a strategy scope and create/restore the model. +strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() +with strategy.scope(): + model = make_or_restore_model() + +callbacks = [ + # This callback saves a SavedModel every 100 batches + keras.callbacks.ModelCheckpoint(filepath='path/to/cloud/location/ckpt', + save_freq=100), + keras.callbacks.TensorBoard('path/to/cloud/location/tb/') +] +model.fit(train_dataset, + callbacks=callbacks, + ...) +``` + +On other workers: + +```python +# Set TF_CONFIG +worker_index = 1 # For instance +os.environ['TF_CONFIG'] = json.dumps({ + 'cluster': { + 'worker': ["localhost:12345", "localhost:23456"] + }, + 'task': {'type': 'worker', 'index': worker_index} +}) + + +# Open a strategy scope and create/restore the model. +# You can restore from the checkpoint saved by the chief. +strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() +with strategy.scope(): + model = make_or_restore_model() + +callbacks = [ + keras.callbacks.ModelCheckpoint(filepath='local/path/ckpt', save_freq=100), + keras.callbacks.TensorBoard('local/path/tb/') +] +model.fit(train_dataset, + callbacks=callbacks, + ...) +``` + +On the evaluator: + +```python +strategy = tf.distribute.MirroredStrategy() +with strategy.scope(): + model = make_or_restore_model() # Restore from the checkpoint saved by the chief. + +results = model.evaluate(val_dataset) +# Then, log the results on a shared location, write TensorBoard logs, etc +``` + +### Further reading + + +1. [TensorFlow distributed training guide]( + https://www.tensorflow.org/guide/distributed_training) +2. [Tutorial on multi-worker training with Keras]( + https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) +3. [MirroredStrategy docs]( + https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy) +4. [MultiWorkerMirroredStrategy docs]( + https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/MultiWorkerMirroredStrategy) +5. [Distributed training in tf.keras with Weights & Biases]( + https://towardsdatascience.com/distributed-training-in-tf-keras-with-w-b-ccf021f9322e) diff --git a/.tether/vignettes-src/distributed_training_with_jax.Rmd b/.tether/vignettes-src/distributed_training_with_jax.Rmd new file mode 100644 index 0000000000..abe53e079f --- /dev/null +++ b/.tether/vignettes-src/distributed_training_with_jax.Rmd @@ -0,0 +1,271 @@ +--- +title: Multi-GPU distributed training with JAX +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/07/11 +last-modified: 2023/07/11 +description: Guide to multi-GPU/TPU training for Keras models with JAX. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras/guides/distributed_training_with_jax.py +--- + +## Introduction + +There are generally two ways to distribute computation across multiple devices: + +**Data parallelism**, where a single model gets replicated on multiple devices or +multiple machines. Each of them processes different batches of data, then they merge +their results. There exist many variants of this setup, that differ in how the different +model replicas merge results, in whether they stay in sync at every batch or whether they +are more loosely coupled, etc. + +**Model parallelism**, where different parts of a single model run on different devices, +processing a single batch of data together. This works best with models that have a +naturally-parallel architecture, such as models that feature multiple branches. + +This guide focuses on data parallelism, in particular **synchronous data parallelism**, +where the different replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you would see for +single-device training. + +Specifically, this guide teaches you how to use `jax.sharding` APIs to train Keras +models, with minimal changes to your code, on multiple GPUs or TPUS (typically 2 to 16) +installed on a single machine (single host, multi-device training). This is the +most common setup for researchers and small-scale industry workflows. + +## Setup + +Let's start by defining the function that creates the model that we will train, +and the function that creates the dataset we will train on (MNIST in this case). + +```python +import os + +os.environ["KERAS_BACKEND"] = "jax" + +import jax +import numpy as np +import tensorflow as tf +import keras + +from jax.experimental import mesh_utils +from jax.sharding import Mesh +from jax.sharding import NamedSharding +from jax.sharding import PartitionSpec as P + + +def get_model(): + # Make a simple convnet with batch normalization and dropout. + inputs = keras.Input(shape=(28, 28, 1)) + x = keras.layers.Rescaling(1.0 / 255.0)(inputs) + x = keras.layers.Conv2D( + filters=12, kernel_size=3, padding="same", use_bias=False + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=24, + kernel_size=6, + use_bias=False, + strides=2, + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=32, + kernel_size=6, + padding="same", + strides=2, + name="large_k", + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.GlobalAveragePooling2D()(x) + x = keras.layers.Dense(256, activation="relu")(x) + x = keras.layers.Dropout(0.5)(x) + outputs = keras.layers.Dense(10)(x) + model = keras.Model(inputs, outputs) + return model + + +def get_datasets(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") + x_test = x_test.astype("float32") + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + print(x_train.shape[0], "train samples") + print(x_test.shape[0], "test samples") + + # Create TF Datasets + train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + eval_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)) + return train_data, eval_data +``` + +## Single-host, multi-device synchronous training + +In this setup, you have one machine with several GPUs or TPUs on it (typically 2 to 16). +Each device will run a copy of your model (called a **replica**). For simplicity, in +what follows, we'll assume we're dealing with 8 GPUs, at no loss of generality. + +**How it works** + +At each step of training: + +- The current batch of data (called **global batch**) is split into 8 different + sub-batches (called **local batches**). For instance, if the global batch has 512 + samples, each of the 8 local batches will have 64 samples. +- Each of the 8 replicas independently processes a local batch: they run a forward pass, + then a backward pass, outputting the gradient of the weights with respect to the loss of + the model on the local batch. +- The weight updates originating from local gradients are efficiently merged across the 8 + replicas. Because this is done at the end of every step, the replicas always stay in + sync. + +In practice, the process of synchronously updating the weights of the model replicas is +handled at the level of each individual weight variable. This is done through a using +a `jax.sharding.NamedSharding` that is configured to replicate the variables. + +**How to use it** + +To do single-host, multi-device synchronous training with a Keras model, you +would use the `jax.sharding` features. Here's how it works: + +- We first create a device mesh using `mesh_utils.create_device_mesh`. +- We use `jax.sharding.Mesh`, `jax.sharding.NamedSharding` and + `jax.sharding.PartitionSpec` to define how to partition JAX arrays. + - We specify that we want to replicate the model and optimizer variables + across all devices by using a spec with no axis. + - We specify that we want to shard the data across devices by using a spec + that splits along the batch dimension. +- We use `jax.device_put` to replicate the model and optimizer variables across + devices. This happens once at the beginning. +- In the training loop, for each batch that we process, we use `jax.device_put` + to split the batch across devices before invoking the train step. + +Here's the flow, where each step is split into its own utility function: + +```python +# Config +num_epochs = 2 +batch_size = 64 + +train_data, eval_data = get_datasets() +train_data = train_data.batch(batch_size, drop_remainder=True) + +model = get_model() +optimizer = keras.optimizers.Adam(1e-3) +loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +# Initialize all state with .build() +(one_batch, one_batch_labels) = next(iter(train_data)) +model.build(one_batch) +optimizer.build(model.trainable_variables) + + +# This is the loss function that will be differentiated. +# Keras provides a pure functional forward pass: model.stateless_call +def compute_loss(trainable_variables, non_trainable_variables, x, y): + y_pred, updated_non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss_value = loss(y, y_pred) + return loss_value, updated_non_trainable_variables + + +# Function to compute gradients +compute_gradients = jax.value_and_grad(compute_loss, has_aux=True) + + +# Training step, Keras provides a pure functional optimizer.stateless_apply +@jax.jit +def train_step(train_state, x, y): + ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + ) = train_state + (loss_value, non_trainable_variables), grads = compute_gradients( + trainable_variables, non_trainable_variables, x, y + ) + + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + + return loss_value, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + ) + + +# Replicate the model and optimizer variable on all devices +def get_replicated_train_state(devices): + # All variables will be replicated on all devices + var_mesh = Mesh(devices, axis_names=("_")) + # In NamedSharding, axes not mentioned are replicated (all axes here) + var_replication = NamedSharding(var_mesh, P()) + + # Apply the distribution settings to the model variables + trainable_variables = jax.device_put( + model.trainable_variables, var_replication + ) + non_trainable_variables = jax.device_put( + model.non_trainable_variables, var_replication + ) + optimizer_variables = jax.device_put(optimizer.variables, var_replication) + + # Combine all state in a tuple + return (trainable_variables, non_trainable_variables, optimizer_variables) + + +num_devices = len(jax.local_devices()) +print(f"Running on {num_devices} devices: {jax.local_devices()}") +devices = mesh_utils.create_device_mesh((num_devices,)) + +# Data will be split along the batch axis +data_mesh = Mesh(devices, axis_names=("batch",)) # naming axes of the mesh +data_sharding = NamedSharding( + data_mesh, + P( + "batch", + ), +) # naming axes of the sharded partition + +# Display data sharding +x, y = next(iter(train_data)) +sharded_x = jax.device_put(x.numpy(), data_sharding) +print("Data sharding") +jax.debug.visualize_array_sharding(jax.numpy.reshape(sharded_x, [-1, 28 * 28])) + +train_state = get_replicated_train_state(devices) + +# Custom training loop +for epoch in range(num_epochs): + data_iter = iter(train_data) + for data in data_iter: + x, y = data + sharded_x = jax.device_put(x.numpy(), data_sharding) + loss_value, train_state = train_step(train_state, sharded_x, y.numpy()) + print("Epoch", epoch, "loss:", loss_value) + +# Post-processing model state update to write them back into the model +trainable_variables, non_trainable_variables, optimizer_variables = train_state +for variable, value in zip(model.trainable_variables, trainable_variables): + variable.assign(value) +for variable, value in zip( + model.non_trainable_variables, non_trainable_variables +): + variable.assign(value) +``` + +That's it! + diff --git a/.tether/vignettes-src/distributed_training_with_tensorflow.Rmd b/.tether/vignettes-src/distributed_training_with_tensorflow.Rmd new file mode 100644 index 0000000000..7fb94e8896 --- /dev/null +++ b/.tether/vignettes-src/distributed_training_with_tensorflow.Rmd @@ -0,0 +1,268 @@ +--- +title: Multi-GPU distributed training with TensorFlow +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2020/04/28 +last-modified: 2023/06/29 +description: Guide to multi-GPU training for Keras models with TensorFlow. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/distributed_training_with_tensorflow.py +--- + +## Introduction + +There are generally two ways to distribute computation across multiple devices: + +**Data parallelism**, where a single model gets replicated on multiple devices or +multiple machines. Each of them processes different batches of data, then they merge +their results. There exist many variants of this setup, that differ in how the different +model replicas merge results, in whether they stay in sync at every batch or whether they +are more loosely coupled, etc. + +**Model parallelism**, where different parts of a single model run on different devices, +processing a single batch of data together. This works best with models that have a +naturally-parallel architecture, such as models that feature multiple branches. + +This guide focuses on data parallelism, in particular **synchronous data parallelism**, +where the different replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you would see for +single-device training. + +Specifically, this guide teaches you how to use the `tf.distribute` API to train Keras +models on multiple GPUs, with minimal changes to your code, +on multiple GPUs (typically 2 to 16) installed on a single machine (single host, +multi-device training). This is the most common setup for researchers and small-scale +industry workflows. + +## Setup + +```python +import os + +os.environ["KERAS_BACKEND"] = "tensorflow" + +import tensorflow as tf +import keras +``` + +## Single-host, multi-device synchronous training + +In this setup, you have one machine with several GPUs on it (typically 2 to 16). Each +device will run a copy of your model (called a **replica**). For simplicity, in what +follows, we'll assume we're dealing with 8 GPUs, at no loss of generality. + +**How it works** + +At each step of training: + +- The current batch of data (called **global batch**) is split into 8 different +sub-batches (called **local batches**). For instance, if the global batch has 512 +samples, each of the 8 local batches will have 64 samples. +- Each of the 8 replicas independently processes a local batch: they run a forward pass, +then a backward pass, outputting the gradient of the weights with respect to the loss of +the model on the local batch. +- The weight updates originating from local gradients are efficiently merged across the 8 +replicas. Because this is done at the end of every step, the replicas always stay in +sync. + +In practice, the process of synchronously updating the weights of the model replicas is +handled at the level of each individual weight variable. This is done through a **mirrored +variable** object. + +**How to use it** + +To do single-host, multi-device synchronous training with a Keras model, you would use +the [`tf.distribute.MirroredStrategy` API]( + https://www.tensorflow.org/api_docs/python/tf/distribute/MirroredStrategy). +Here's how it works: + +- Instantiate a `MirroredStrategy`, optionally configuring which specific devices you +want to use (by default the strategy will use all GPUs available). +- Use the strategy object to open a scope, and within this scope, create all the Keras +objects you need that contain variables. Typically, that means **creating & compiling the +model** inside the distribution scope. In some cases, the first call to `fit()` may also +create variables, so it's a good idea to put your `fit()` call in the scope as well. +- Train the model via `fit()` as usual. + +Importantly, we recommend that you use `tf.data.Dataset` objects to load data +in a multi-device or distributed workflow. + +Schematically, it looks like this: + +```python +# Create a MirroredStrategy. +strategy = tf.distribute.MirroredStrategy() +print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) + +# Open a strategy scope. +with strategy.scope(): + # Everything that creates variables should be under the strategy scope. + # In general this is only model construction & `compile()`. + model = Model(...) + model.compile(...) + + # Train the model on all available devices. + model.fit(train_dataset, validation_data=val_dataset, ...) + + # Test the model on all available devices. + model.evaluate(test_dataset) +``` + +Here's a simple end-to-end runnable example: + +```python +def get_compiled_model(): + # Make a simple 2-layer densely-connected neural network. + inputs = keras.Input(shape=(784,)) + x = keras.layers.Dense(256, activation="relu")(inputs) + x = keras.layers.Dense(256, activation="relu")(x) + outputs = keras.layers.Dense(10)(x) + model = keras.Model(inputs, outputs) + model.compile( + optimizer=keras.optimizers.Adam(), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=[keras.metrics.SparseCategoricalAccuracy()], + ) + return model + + +def get_dataset(): + batch_size = 32 + num_val_samples = 10000 + + # Return the MNIST dataset in the form of a `tf.data.Dataset`. + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Preprocess the data (these are Numpy arrays) + x_train = x_train.reshape(-1, 784).astype("float32") / 255 + x_test = x_test.reshape(-1, 784).astype("float32") / 255 + y_train = y_train.astype("float32") + y_test = y_test.astype("float32") + + # Reserve num_val_samples samples for validation + x_val = x_train[-num_val_samples:] + y_val = y_train[-num_val_samples:] + x_train = x_train[:-num_val_samples] + y_train = y_train[:-num_val_samples] + return ( + tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(batch_size), + tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(batch_size), + tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size), + ) + + +# Create a MirroredStrategy. +strategy = tf.distribute.MirroredStrategy() +print("Number of devices: {}".format(strategy.num_replicas_in_sync)) + +# Open a strategy scope. +with strategy.scope(): + # Everything that creates variables should be under the strategy scope. + # In general this is only model construction & `compile()`. + model = get_compiled_model() + + # Train the model on all available devices. + train_dataset, val_dataset, test_dataset = get_dataset() + model.fit(train_dataset, epochs=2, validation_data=val_dataset) + + # Test the model on all available devices. + model.evaluate(test_dataset) +``` + +## Using callbacks to ensure fault tolerance + +When using distributed training, you should always make sure you have a strategy to +recover from failure (fault tolerance). The simplest way to handle this is to pass +`ModelCheckpoint` callback to `fit()`, to save your model +at regular intervals (e.g. every 100 batches or every epoch). You can then restart +training from your saved model. + +Here's a simple example: + +```python +# Prepare a directory to store all the checkpoints. +checkpoint_dir = "./ckpt" +if not os.path.exists(checkpoint_dir): + os.makedirs(checkpoint_dir) + + +def make_or_restore_model(): + # Either restore the latest model, or create a fresh one + # if there is no checkpoint available. + checkpoints = [checkpoint_dir + "/" + name for name in os.listdir(checkpoint_dir)] + if checkpoints: + latest_checkpoint = max(checkpoints, key=os.path.getctime) + print("Restoring from", latest_checkpoint) + return keras.models.load_model(latest_checkpoint) + print("Creating a new model") + return get_compiled_model() + + +def run_training(epochs=1): + # Create a MirroredStrategy. + strategy = tf.distribute.MirroredStrategy() + + # Open a strategy scope and create/restore the model + with strategy.scope(): + model = make_or_restore_model() + + callbacks = [ + # This callback saves a SavedModel every epoch + # We include the current epoch in the folder name. + keras.callbacks.ModelCheckpoint( + filepath=checkpoint_dir + "/ckpt-{epoch}.keras", + save_freq="epoch", + ) + ] + model.fit( + train_dataset, + epochs=epochs, + callbacks=callbacks, + validation_data=val_dataset, + verbose=2, + ) + + +# Running the first time creates the model +run_training(epochs=1) + +# Calling the same function again will resume from where we left off +run_training(epochs=1) +``` + +## `tf.data` performance tips + +When doing distributed training, the efficiency with which you load data can often become +critical. Here are a few tips to make sure your `tf.data` pipelines +run as fast as possible. + +**Note about dataset batching** + +When creating your dataset, make sure it is batched with the global batch size. +For instance, if each of your 8 GPUs is capable of running a batch of 64 samples, you +call use a global batch size of 512. + +**Calling `dataset.cache()`** + +If you call `.cache()` on a dataset, its data will be cached after running through the +first iteration over the data. Every subsequent iteration will use the cached data. The +cache can be in memory (default) or to a local file you specify. + +This can improve performance when: + +- Your data is not expected to change from iteration to iteration +- You are reading data from a remote distributed filesystem +- You are reading data from local disk, but your data would fit in memory and your +workflow is significantly IO-bound (e.g. reading & decoding image files). + +**Calling `dataset.prefetch(buffer_size)`** + +You should almost always call `.prefetch(buffer_size)` after creating a dataset. It means +your data pipeline will run asynchronously from your model, +with new samples being preprocessed and stored in a buffer while the current batch +samples are used to train the model. The next batch will be prefetched in GPU memory by +the time the current batch is over. + +That's it! + diff --git a/.tether/vignettes-src/distributed_training_with_torch.Rmd b/.tether/vignettes-src/distributed_training_with_torch.Rmd new file mode 100644 index 0000000000..92eb7a78ec --- /dev/null +++ b/.tether/vignettes-src/distributed_training_with_torch.Rmd @@ -0,0 +1,268 @@ +--- +title: Multi-GPU distributed training with PyTorch +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/29 +last-modified: 2023/06/29 +description: Guide to multi-GPU training for Keras models with PyTorch. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras/guides/distributed_training_with_torch.py +--- + +## Introduction + +There are generally two ways to distribute computation across multiple devices: + +**Data parallelism**, where a single model gets replicated on multiple devices or +multiple machines. Each of them processes different batches of data, then they merge +their results. There exist many variants of this setup, that differ in how the different +model replicas merge results, in whether they stay in sync at every batch or whether they +are more loosely coupled, etc. + +**Model parallelism**, where different parts of a single model run on different devices, +processing a single batch of data together. This works best with models that have a +naturally-parallel architecture, such as models that feature multiple branches. + +This guide focuses on data parallelism, in particular **synchronous data parallelism**, +where the different replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you would see for +single-device training. + +Specifically, this guide teaches you how to use PyTorch's `DistributedDataParallel` +module wrapper to train Keras, with minimal changes to your code, +on multiple GPUs (typically 2 to 16) installed on a single machine (single host, +multi-device training). This is the most common setup for researchers and small-scale +industry workflows. + +## Setup + +Let's start by defining the function that creates the model that we will train, +and the function that creates the dataset we will train on (MNIST in this case). + +```python +import os + +os.environ["KERAS_BACKEND"] = "torch" + +import torch +import numpy as np +import keras + + +def get_model(): + # Make a simple convnet with batch normalization and dropout. + inputs = keras.Input(shape=(28, 28, 1)) + x = keras.layers.Rescaling(1.0 / 255.0)(inputs) + x = keras.layers.Conv2D( + filters=12, kernel_size=3, padding="same", use_bias=False + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=24, + kernel_size=6, + use_bias=False, + strides=2, + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=32, + kernel_size=6, + padding="same", + strides=2, + name="large_k", + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.GlobalAveragePooling2D()(x) + x = keras.layers.Dense(256, activation="relu")(x) + x = keras.layers.Dropout(0.5)(x) + outputs = keras.layers.Dense(10)(x) + model = keras.Model(inputs, outputs) + return model + + +def get_dataset(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") + x_test = x_test.astype("float32") + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + + # Create a TensorDataset + dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_train), torch.from_numpy(y_train) + ) + return dataset +``` + +Next, let's define a simple PyTorch training loop that targets +a GPU (note the calls to `.cuda()`). + +```python +def train_model(model, dataloader, num_epochs, optimizer, loss_fn): + for epoch in range(num_epochs): + running_loss = 0.0 + running_loss_count = 0 + for batch_idx, (inputs, targets) in enumerate(dataloader): + inputs = inputs.cuda(non_blocking=True) + targets = targets.cuda(non_blocking=True) + + # Forward pass + outputs = model(inputs) + loss = loss_fn(outputs, targets) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + running_loss += loss.item() + running_loss_count += 1 + + # Print loss statistics + print( + f"Epoch {epoch + 1}/{num_epochs}, " + f"Loss: {running_loss / running_loss_count}" + ) +``` + +## Single-host, multi-device synchronous training + +In this setup, you have one machine with several GPUs on it (typically 2 to 16). Each +device will run a copy of your model (called a **replica**). For simplicity, in what +follows, we'll assume we're dealing with 8 GPUs, at no loss of generality. + +**How it works** + +At each step of training: + +- The current batch of data (called **global batch**) is split into 8 different +sub-batches (called **local batches**). For instance, if the global batch has 512 +samples, each of the 8 local batches will have 64 samples. +- Each of the 8 replicas independently processes a local batch: they run a forward pass, +then a backward pass, outputting the gradient of the weights with respect to the loss of +the model on the local batch. +- The weight updates originating from local gradients are efficiently merged across the 8 +replicas. Because this is done at the end of every step, the replicas always stay in +sync. + +In practice, the process of synchronously updating the weights of the model replicas is +handled at the level of each individual weight variable. This is done through a **mirrored +variable** object. + +**How to use it** + +To do single-host, multi-device synchronous training with a Keras model, you would use +the `torch.nn.parallel.DistributedDataParallel` module wrapper. +Here's how it works: + +- We use `torch.multiprocessing.start_processes` to start multiple Python processes, one +per device. Each process will run the `per_device_launch_fn` function. +- The `per_device_launch_fn` function does the following: + - It uses `torch.distributed.init_process_group` and `torch.cuda.set_device` + to configure the device to be used for that process. + - It uses `torch.utils.data.distributed.DistributedSampler` + and `torch.utils.data.DataLoader` to turn our data into a distributed data loader. + - It also uses `torch.nn.parallel.DistributedDataParallel` to turn our model into + a distributed PyTorch module. + - It then calls the `train_model` function. +- The `train_model` function will then run in each process, with the model using +a separate device in each process. + +Here's the flow, where each step is split into its own utility function: + +```python +# Config +num_gpu = torch.cuda.device_count() +num_epochs = 2 +batch_size = 64 +print(f"Running on {num_gpu} GPUs") + + +def setup_device(current_gpu_index, num_gpus): + # Device setup + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "56492" + device = torch.device("cuda:{}".format(current_gpu_index)) + torch.distributed.init_process_group( + backend="nccl", + init_method="env://", + world_size=num_gpus, + rank=current_gpu_index, + ) + torch.cuda.set_device(device) + + +def cleanup(): + torch.distributed.destroy_process_group() + + +def prepare_dataloader(dataset, current_gpu_index, num_gpus, batch_size): + sampler = torch.utils.data.distributed.DistributedSampler( + dataset, + num_replicas=num_gpus, + rank=current_gpu_index, + shuffle=False, + ) + dataloader = torch.utils.data.DataLoader( + dataset, + sampler=sampler, + batch_size=batch_size, + shuffle=False, + ) + return dataloader + + +def per_device_launch_fn(current_gpu_index, num_gpu): + # Setup the process groups + setup_device(current_gpu_index, num_gpu) + + dataset = get_dataset() + model = get_model() + + # prepare the dataloader + dataloader = prepare_dataloader( + dataset, current_gpu_index, num_gpu, batch_size + ) + + # Instantiate the torch optimizer + optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + + # Instantiate the torch loss function + loss_fn = torch.nn.CrossEntropyLoss() + + # Put model on device + model = model.to(current_gpu_index) + ddp_model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[current_gpu_index], output_device=current_gpu_index + ) + + train_model(ddp_model, dataloader, num_epochs, optimizer, loss_fn) + + cleanup() +``` + +Time to start multiple processes: + +```python +if __name__ == "__main__": + # We use the "fork" method rather than "spawn" to support notebooks + torch.multiprocessing.start_processes( + per_device_launch_fn, + args=(num_gpu,), + nprocs=num_gpu, + join=True, + start_method="fork", + ) +``` + +That's it! + diff --git a/.tether/vignettes-src/distribution.Rmd b/.tether/vignettes-src/distribution.Rmd new file mode 100644 index 0000000000..d1fb7deeb1 --- /dev/null +++ b/.tether/vignettes-src/distribution.Rmd @@ -0,0 +1,237 @@ +--- +title: Distributed training with Keras 3 +author: '[Qianli Zhu](https://github.com/qlzh727)' +date-created: 2023/11/07 +last-modified: 2023/11/07 +description: Complete guide to the distribution API for multi-backend Keras. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/distribution.py +--- + +## Introduction + +The Keras distribution API is a new interface designed to facilitate +distributed deep learning across a variety of backends like JAX, TensorFlow and +PyTorch. This powerful API introduces a suite of tools enabling data and model +parallelism, allowing for efficient scaling of deep learning models on multiple +accelerators and hosts. Whether leveraging the power of GPUs or TPUs, the API +provides a streamlined approach to initializing distributed environments, +defining device meshes, and orchestrating the layout of tensors across +computational resources. Through classes like `DataParallel` and +`ModelParallel`, it abstracts the complexity involved in parallel computation, +making it easier for developers to accelerate their machine learning +workflows. + +## How it works + +The Keras distribution API provides a global programming model that allows +developers to compose applications that operate on tensors in a global context +(as if working with a single device) while +automatically managing distribution across many devices. The API leverages the +underlying framework (e.g. JAX) to distribute the program and tensors according to the +sharding directives through a procedure called single program, multiple data +(SPMD) expansion. + +By decoupling the application from sharding directives, the API enables running +the same application on a single device, multiple devices, or even multiple +clients, while preserving its global semantics. + +## Setup + +```python +import os + +# The distribution API is only implemented for the JAX backend for now. +os.environ["KERAS_BACKEND"] = "jax" + +import keras +from keras import layers +import jax +import numpy as np +from tensorflow import data as tf_data # For dataset input. +``` + +## `DeviceMesh` and `TensorLayout` + +The `keras.distribution.DeviceMesh` class in Keras distribution API represents a cluster of +computational devices configured for distributed computation. It aligns with +similar concepts in [`jax.sharding.Mesh`](https://jax.readthedocs.io/en/latest/jax.sharding.html#jax.sharding.Mesh) and +[`tf.dtensor.Mesh`](https://www.tensorflow.org/api_docs/python/tf/experimental/dtensor/Mesh), +where it's used to map the physical devices to a logical mesh structure. + +The `TensorLayout` class then specifies how tensors are distributed across the +`DeviceMesh`, detailing the sharding of tensors along specified axes that +correspond to the names of the axes in the `DeviceMesh`. + +You can find more detailed concept explainers in the +[TensorFlow DTensor guide](https://www.tensorflow.org/guide/dtensor_overview#dtensors_model_of_distributed_tensors). + +```python +# Retrieve the local available gpu devices. +devices = jax.devices("gpu") # Assume it has 8 local GPUs. + +# Define a 2x4 device mesh with data and model parallel axes +mesh = keras.distribution.DeviceMesh( + shape=(2, 4), axis_names=["data", "model"], devices=devices +) + +# A 2D layout, which describes how a tensor is distributed across the +# mesh. The layout can be visualized as a 2D grid with "model" as rows and +# "data" as columns, and it is a [4, 2] grid when it mapped to the physical +# devices on the mesh. +layout_2d = keras.distribution.TensorLayout(axes=("model", "data"), device_mesh=mesh) + +# A 4D layout which could be used for data parallel of a image input. +replicated_layout_4d = keras.distribution.TensorLayout( + axes=("data", None, None, None), device_mesh=mesh +) +``` + +## Distribution + +The `Distribution` class in Keras serves as a foundational abstract class designed +for developing custom distribution strategies. It encapsulates the core logic +needed to distribute a model's variables, input data, and intermediate +computations across a device mesh. As an end user, you won't have to interact +directly with this class, but its subclasses like `DataParallel` or +`ModelParallel`. + +## DataParallel + +The `DataParallel` class in the Keras distribution API is designed for the +data parallelism strategy in distributed training, where the model weights are +replicated across all devices in the `DeviceMesh`, and each device processes a +portion of the input data. + +Here is a sample usage of this class. + +```python +# Create DataParallel with list of devices. +# As a shortcut, the devices can be skipped, +# and Keras will detect all local available devices. +# E.g. data_parallel = DataParallel() +data_parallel = keras.distribution.DataParallel(devices=devices) + +# Or you can choose to create DataParallel with a 1D `DeviceMesh`. +mesh_1d = keras.distribution.DeviceMesh( + shape=(8,), axis_names=["data"], devices=devices +) +data_parallel = keras.distribution.DataParallel(device_mesh=mesh_1d) + +inputs = np.random.normal(size=(128, 28, 28, 1)) +labels = np.random.normal(size=(128, 10)) +dataset = tf_data.Dataset.from_tensor_slices((inputs, labels)).batch(16) + +# Set the global distribution. +keras.distribution.set_distribution(data_parallel) + +# Note that all the model weights from here on are replicated to +# all the devices of the `DeviceMesh`. This includes the RNG +# state, optimizer states, metrics, etc. The dataset fed into `model.fit` or +# `model.evaluate` will be split evenly on the batch dimension, and sent to +# all the devices. You don't have to do any manual aggregration of losses, +# since all the computation happens in a global context. +inputs = layers.Input(shape=(28, 28, 1)) +y = layers.Flatten()(inputs) +y = layers.Dense(units=200, use_bias=False, activation="relu")(y) +y = layers.Dropout(0.4)(y) +y = layers.Dense(units=10, activation="softmax")(y) +model = keras.Model(inputs=inputs, outputs=y) + +model.compile(loss="mse") +model.fit(dataset, epochs=3) +model.evaluate(dataset) +``` + +## `ModelParallel` and `LayoutMap` + +`ModelParallel` will be mostly useful when model weights are too large to fit +on a single accelerator. This setting allows you to spit your model weights or +activation tensors across all the devices on the `DeviceMesh`, and enable the +horizontal scaling for the large models. + +Unlike the `DataParallel` model where all weights are fully replicated, +the weights layout under `ModelParallel` usually need some customization for +best performances. We introduce `LayoutMap` to let you specify the +`TensorLayout` for any weights and intermediate tensors from global perspective. + +`LayoutMap` is a dict-like object that maps a string to `TensorLayout` +instances. It behaves differently from a normal Python dict in that the string +key is treated as a regex when retrieving the value. The class allows you to +define the naming schema of `TensorLayout` and then retrieve the corresponding +`TensorLayout` instance. Typically, the key used to query +is the `variable.path` attribute, which is the identifier of the variable. +As a shortcut, a tuple or list of axis +names is also allowed when inserting a value, and it will be converted to +`TensorLayout`. + +The `LayoutMap` can also optionally contain a `DeviceMesh` to populate the +`TensorLayout.device_mesh` if it is not set. When retrieving a layout with a +key, and if there isn't an exact match, all existing keys in the layout map will +be treated as regex and matched against the input key again. If there are +multiple matches, a `ValueError` is raised. If no matches are found, `None` is +returned. + +```python +mesh_2d = keras.distribution.DeviceMesh( + shape=(2, 4), axis_names=["data", "model"], devices=devices +) +layout_map = keras.distribution.LayoutMap(mesh_2d) +# The rule below means that for any weights that match with d1/kernel, it +# will be sharded with model dimensions (4 devices), same for the d1/bias. +# All other weights will be fully replicated. +layout_map["d1/kernel"] = (None, "model") +layout_map["d1/bias"] = ("model",) + +# You can also set the layout for the layer output like +layout_map["d2/output"] = ("data", None) + +model_parallel = keras.distribution.ModelParallel( + mesh_2d, layout_map, batch_dim_name="data" +) + +keras.distribution.set_distribution(model_parallel) + +inputs = layers.Input(shape=(28, 28, 1)) +y = layers.Flatten()(inputs) +y = layers.Dense(units=200, use_bias=False, activation="relu", name="d1")(y) +y = layers.Dropout(0.4)(y) +y = layers.Dense(units=10, activation="softmax", name="d2")(y) +model = keras.Model(inputs=inputs, outputs=y) + +# The data will be sharded across the "data" dimension of the method, which +# has 2 devices. +model.compile(loss="mse") +model.fit(dataset, epochs=3) +model.evaluate(dataset) +``` + +It is also easy to change the mesh structure to tune the computation between +more data parallel or model parallel. You can do this by adjusting the shape of +the mesh. And no changes are needed for any other code. + +```python +full_data_parallel_mesh = keras.distribution.DeviceMesh( + shape=(8, 1), axis_names=["data", "model"], devices=devices +) +more_data_parallel_mesh = keras.distribution.DeviceMesh( + shape=(4, 2), axis_names=["data", "model"], devices=devices +) +more_model_parallel_mesh = keras.distribution.DeviceMesh( + shape=(2, 4), axis_names=["data", "model"], devices=devices +) +full_model_parallel_mesh = keras.distribution.DeviceMesh( + shape=(1, 8), axis_names=["data", "model"], devices=devices +) +``` + +### Further reading + +1. [JAX Distributed arrays and automatic parallelization](https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html) +2. [JAX sharding module](https://jax.readthedocs.io/en/latest/jax.sharding.html) +3. [TensorFlow Distributed training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) +4. [TensorFlow DTensor concepts](https://www.tensorflow.org/guide/dtensor_overview) +5. [Using DTensors with tf.keras](https://www.tensorflow.org/tutorials/distribute/dtensor_keras_tutorial) + diff --git a/.tether/vignettes-src/examples/autoencoder.Rmd b/.tether/vignettes-src/examples/autoencoder.Rmd new file mode 100644 index 0000000000..858a176a76 --- /dev/null +++ b/.tether/vignettes-src/examples/autoencoder.Rmd @@ -0,0 +1,166 @@ +--- +title: Convolutional autoencoder for image denoising +author: '[Santiago L. Valdarrama](https://twitter.com/svpino)' +date-created: 2021/03/01 +last-modified: 2021/03/01 +description: How to train a deep convolutional autoencoder for image denoising. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/vision/autoencoder.py +--- + +## Introduction + +This example demonstrates how to implement a deep convolutional autoencoder +for image denoising, mapping noisy digits images from the MNIST dataset to +clean digits images. This implementation is based on an original blog post +titled [Building Autoencoders in Keras](https://blog.keras.io/building-autoencoders-in-keras.html) +by [François Chollet](https://twitter.com/fchollet). + +## Setup + +```python +import numpy as np +import matplotlib.pyplot as plt + +from keras import layers +from keras.datasets import mnist +from keras.models import Model + + +def preprocess(array): + """Normalizes the supplied array and reshapes it.""" + array = array.astype("float32") / 255.0 + array = np.reshape(array, (len(array), 28, 28, 1)) + return array + + +def noise(array): + """Adds random noise to each image in the supplied array.""" + noise_factor = 0.4 + noisy_array = array + noise_factor * np.random.normal( + loc=0.0, scale=1.0, size=array.shape + ) + + return np.clip(noisy_array, 0.0, 1.0) + + +def display(array1, array2): + """Displays ten random images from each array.""" + n = 10 + indices = np.random.randint(len(array1), size=n) + images1 = array1[indices, :] + images2 = array2[indices, :] + + plt.figure(figsize=(20, 4)) + for i, (image1, image2) in enumerate(zip(images1, images2)): + ax = plt.subplot(2, n, i + 1) + plt.imshow(image1.reshape(28, 28)) + plt.gray() + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + + ax = plt.subplot(2, n, i + 1 + n) + plt.imshow(image2.reshape(28, 28)) + plt.gray() + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + + plt.show() +``` + +## Prepare the data + +```python +# Since we only need images from the dataset to encode and decode, we +# won't use the labels. +(train_data, _), (test_data, _) = mnist.load_data() + +# Normalize and reshape the data +train_data = preprocess(train_data) +test_data = preprocess(test_data) + +# Create a copy of the data with added noise +noisy_train_data = noise(train_data) +noisy_test_data = noise(test_data) + +# Display the train data and a version of it with added noise +display(train_data, noisy_train_data) +``` + +## Build the autoencoder + +We are going to use the Functional API to build our convolutional autoencoder. + +```python +input = layers.Input(shape=(28, 28, 1)) + +# Encoder +x = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(input) +x = layers.MaxPooling2D((2, 2), padding="same")(x) +x = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(x) +x = layers.MaxPooling2D((2, 2), padding="same")(x) + +# Decoder +x = layers.Conv2DTranspose(32, (3, 3), strides=2, activation="relu", padding="same")(x) +x = layers.Conv2DTranspose(32, (3, 3), strides=2, activation="relu", padding="same")(x) +x = layers.Conv2D(1, (3, 3), activation="sigmoid", padding="same")(x) + +# Autoencoder +autoencoder = Model(input, x) +autoencoder.compile(optimizer="adam", loss="binary_crossentropy") +autoencoder.summary() +``` + +Now we can train our autoencoder using `train_data` as both our input data +and target. Notice we are setting up the validation data using the same +format. + +```python +autoencoder.fit( + x=train_data, + y=train_data, + epochs=50, + batch_size=128, + shuffle=True, + validation_data=(test_data, test_data), +) +``` + +Let's predict on our test dataset and display the original image together with +the prediction from our autoencoder. + +Notice how the predictions are pretty close to the original images, although +not quite the same. + +```python +predictions = autoencoder.predict(test_data) +display(test_data, predictions) +``` + +Now that we know that our autoencoder works, let's retrain it using the noisy +data as our input and the clean data as our target. We want our autoencoder to +learn how to denoise the images. + +```python +autoencoder.fit( + x=noisy_train_data, + y=train_data, + epochs=100, + batch_size=128, + shuffle=True, + validation_data=(noisy_test_data, test_data), +) +``` + +Let's now predict on the noisy data and display the results of our autoencoder. + +Notice how the autoencoder does an amazing job at removing the noise from the +input images. + +```python +predictions = autoencoder.predict(noisy_test_data) +display(noisy_test_data, predictions) +``` + diff --git a/.tether/vignettes-src/examples/mnist_convnet.Rmd b/.tether/vignettes-src/examples/mnist_convnet.Rmd new file mode 100644 index 0000000000..df5aac8fb5 --- /dev/null +++ b/.tether/vignettes-src/examples/mnist_convnet.Rmd @@ -0,0 +1,84 @@ +--- +title: Simple MNIST convnet +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2015/06/19 +last-modified: 2020/04/21 +description: A simple convnet that achieves ~99% test accuracy on MNIST. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/vision/mnist_convnet.py +--- + +## Setup + +```python +import numpy as np +import keras +from keras import layers +``` + +## Prepare the data + +```python +# Model / data parameters +num_classes = 10 +input_shape = (28, 28, 1) + +# Load the data and split it between train and test sets +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +# Scale images to the [0, 1] range +x_train = x_train.astype("float32") / 255 +x_test = x_test.astype("float32") / 255 +# Make sure images have shape (28, 28, 1) +x_train = np.expand_dims(x_train, -1) +x_test = np.expand_dims(x_test, -1) +print("x_train shape:", x_train.shape) +print(x_train.shape[0], "train samples") +print(x_test.shape[0], "test samples") + + +# convert class vectors to binary class matrices +y_train = keras.utils.to_categorical(y_train, num_classes) +y_test = keras.utils.to_categorical(y_test, num_classes) +``` + +## Build the model + +```python +model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] +) + +model.summary() +``` + +## Train the model + +```python +batch_size = 128 +epochs = 15 + +model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + +model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) +``` + +## Evaluate the trained model + +```python +score = model.evaluate(x_test, y_test, verbose=0) +print("Test loss:", score[0]) +print("Test accuracy:", score[1]) +``` + diff --git a/.tether/vignettes-src/examples/nlp/text_classification_from_scratch.Rmd b/.tether/vignettes-src/examples/nlp/text_classification_from_scratch.Rmd new file mode 100644 index 0000000000..39b7f9ed8a --- /dev/null +++ b/.tether/vignettes-src/examples/nlp/text_classification_from_scratch.Rmd @@ -0,0 +1,277 @@ +--- +title: Text classification from scratch +authors: Mark Omernick, Francois Chollet +date-created: 2019/11/06 +last-modified: 2020/05/17 +description: Text sentiment classification starting from raw text files. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/nlp/text_classification_from_scratch.py +--- + +## Introduction + +This example shows how to do text classification starting from raw text (as +a set of text files on disk). We demonstrate the workflow on the IMDB sentiment +classification dataset (unprocessed version). We use the `TextVectorization` layer for + word splitting & indexing. + +## Setup + +```python +import os + +os.environ["KERAS_BACKEND"] = "tensorflow" + +import keras +import tensorflow as tf +import numpy as np +from keras import layers +``` + +## Load the data: IMDB movie review sentiment classification + +Let's download the data and inspect its structure. + +curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz +tar -xf aclImdb_v1.tar.gz + +The `aclImdb` folder contains a `train` and `test` subfolder: + +ls aclImdb + +ls aclImdb/test + +ls aclImdb/train + +The `aclImdb/train/pos` and `aclImdb/train/neg` folders contain text files, each of + which represents one review (either positive or negative): + +cat aclImdb/train/pos/6248_7.txt + +We are only interested in the `pos` and `neg` subfolders, so let's delete the other subfolder that has text files in it: + +rm -r aclImdb/train/unsup + +You can use the utility `keras.utils.text_dataset_from_directory` to +generate a labeled `tf.data.Dataset` object from a set of text files on disk filed + into class-specific folders. + +Let's use it to generate the training, validation, and test datasets. The validation +and training datasets are generated from two subsets of the `train` directory, with 20% +of samples going to the validation dataset and 80% going to the training dataset. + +Having a validation dataset in addition to the test dataset is useful for tuning +hyperparameters, such as the model architecture, for which the test dataset should not +be used. + +Before putting the model out into the real world however, it should be retrained using all +available training data (without creating a validation dataset), so its performance is maximized. + +When using the `validation_split` & `subset` arguments, make sure to either specify a +random seed, or to pass `shuffle=False`, so that the validation & training splits you +get have no overlap. + +```python +batch_size = 32 +raw_train_ds = keras.utils.text_dataset_from_directory( + "aclImdb/train", + batch_size=batch_size, + validation_split=0.2, + subset="training", + seed=1337, +) +raw_val_ds = keras.utils.text_dataset_from_directory( + "aclImdb/train", + batch_size=batch_size, + validation_split=0.2, + subset="validation", + seed=1337, +) +raw_test_ds = keras.utils.text_dataset_from_directory( + "aclImdb/test", batch_size=batch_size +) + +print(f"Number of batches in raw_train_ds: {raw_train_ds.cardinality()}") +print(f"Number of batches in raw_val_ds: {raw_val_ds.cardinality()}") +print(f"Number of batches in raw_test_ds: {raw_test_ds.cardinality()}") +``` + +Let's preview a few samples: + +```python +# It's important to take a look at your raw data to ensure your normalization +# and tokenization will work as expected. We can do that by taking a few +# examples from the training set and looking at them. +# This is one of the places where eager execution shines: +# we can just evaluate these tensors using .numpy() +# instead of needing to evaluate them in a Session/Graph context. +for text_batch, label_batch in raw_train_ds.take(1): + for i in range(5): + print(text_batch.numpy()[i]) + print(label_batch.numpy()[i]) +``` + +## Prepare the data + +In particular, we remove `
` tags. + +```python +import string +import re + + +# Having looked at our data above, we see that the raw text contains HTML break +# tags of the form '
'. These tags will not be removed by the default +# standardizer (which doesn't strip HTML). Because of this, we will need to +# create a custom standardization function. +def custom_standardization(input_data): + lowercase = tf.strings.lower(input_data) + stripped_html = tf.strings.regex_replace(lowercase, "
", " ") + return tf.strings.regex_replace( + stripped_html, f"[{re.escape(string.punctuation)}]", "" + ) + + +# Model constants. +max_features = 20000 +embedding_dim = 128 +sequence_length = 500 + +# Now that we have our custom standardization, we can instantiate our text +# vectorization layer. We are using this layer to normalize, split, and map +# strings to integers, so we set our 'output_mode' to 'int'. +# Note that we're using the default split function, +# and the custom standardization defined above. +# We also set an explicit maximum sequence length, since the CNNs later in our +# model won't support ragged sequences. +vectorize_layer = keras.layers.TextVectorization( + standardize=custom_standardization, + max_tokens=max_features, + output_mode="int", + output_sequence_length=sequence_length, +) + +# Now that the vectorize_layer has been created, call `adapt` on a text-only +# dataset to create the vocabulary. You don't have to batch, but for very large +# datasets this means you're not keeping spare copies of the dataset in memory. + +# Let's make a text-only dataset (no labels): +text_ds = raw_train_ds.map(lambda x, y: x) +# Let's call `adapt`: +vectorize_layer.adapt(text_ds) +``` + +## Two options to vectorize the data + +There are 2 ways we can use our text vectorization layer: + +**Option 1: Make it part of the model**, so as to obtain a model that processes raw + strings, like this: + +```python +text_input = keras.Input(shape=(1,), dtype=tf.string, name='text') +x = vectorize_layer(text_input) +x = layers.Embedding(max_features + 1, embedding_dim)(x) +... +``` + +**Option 2: Apply it to the text dataset** to obtain a dataset of word indices, then + feed it into a model that expects integer sequences as inputs. + +An important difference between the two is that option 2 enables you to do +**asynchronous CPU processing and buffering** of your data when training on GPU. +So if you're training the model on GPU, you probably want to go with this option to get + the best performance. This is what we will do below. + +If we were to export our model to production, we'd ship a model that accepts raw +strings as input, like in the code snippet for option 1 above. This can be done after + training. We do this in the last section. + +```python +def vectorize_text(text, label): + text = tf.expand_dims(text, -1) + return vectorize_layer(text), label + + +# Vectorize the data. +train_ds = raw_train_ds.map(vectorize_text) +val_ds = raw_val_ds.map(vectorize_text) +test_ds = raw_test_ds.map(vectorize_text) + +# Do async prefetching / buffering of the data for best performance on GPU. +train_ds = train_ds.cache().prefetch(buffer_size=10) +val_ds = val_ds.cache().prefetch(buffer_size=10) +test_ds = test_ds.cache().prefetch(buffer_size=10) +``` + +## Build a model + +We choose a simple 1D convnet starting with an `Embedding` layer. + +```python +# A integer input for vocab indices. +inputs = keras.Input(shape=(None,), dtype="int64") + +# Next, we add a layer to map those vocab indices into a space of dimensionality +# 'embedding_dim'. +x = layers.Embedding(max_features, embedding_dim)(inputs) +x = layers.Dropout(0.5)(x) + +# Conv1D + global max pooling +x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) +x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) +x = layers.GlobalMaxPooling1D()(x) + +# We add a vanilla hidden layer: +x = layers.Dense(128, activation="relu")(x) +x = layers.Dropout(0.5)(x) + +# We project onto a single unit output layer, and squash it with a sigmoid: +predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x) + +model = keras.Model(inputs, predictions) + +# Compile the model with binary crossentropy loss and an adam optimizer. +model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) +``` + +## Train the model + +```python +epochs = 3 + +# Fit the model using the train and test datasets. +model.fit(train_ds, validation_data=val_ds, epochs=epochs) +``` + +## Evaluate the model on the test set + +```python +model.evaluate(test_ds) +``` + +## Make an end-to-end model + +If you want to obtain a model capable of processing raw strings, you can simply +create a new model (using the weights we just trained): + +```python +# A string input +inputs = keras.Input(shape=(1,), dtype="string") +# Turn strings into vocab indices +indices = vectorize_layer(inputs) +# Turn vocab indices into predictions +outputs = model(indices) + +# Our end to end model +end_to_end_model = keras.Model(inputs, outputs) +end_to_end_model.compile( + loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"] +) + +# Test it with `raw_test_ds`, which yields raw strings +end_to_end_model.evaluate(raw_test_ds) +``` + diff --git a/.tether/vignettes-src/examples/structured_data/imbalanced_classification.Rmd b/.tether/vignettes-src/examples/structured_data/imbalanced_classification.Rmd new file mode 100644 index 0000000000..663e0024c3 --- /dev/null +++ b/.tether/vignettes-src/examples/structured_data/imbalanced_classification.Rmd @@ -0,0 +1,149 @@ +""" +Title: Imbalanced classification: credit card fraud detection +Author: [fchollet](https://twitter.com/fchollet) +Date created: 2019/05/28 +Last modified: 2020/04/17 +Description: Demonstration of how to handle highly imbalanced classification problems. +Accelerator: GPU +""" + +""" +## Introduction + +This example looks at the +[Kaggle Credit Card Fraud Detection](https://www.kaggle.com/mlg-ulb/creditcardfraud/) +dataset to demonstrate how +to train a classification model on data with highly imbalanced classes. +""" + +""" +## First, vectorize the CSV data +""" + +import csv +import numpy as np + +# Get the real data from https://www.kaggle.com/mlg-ulb/creditcardfraud/ +fname = "/Users/fchollet/Downloads/creditcard.csv" + +all_features = [] +all_targets = [] +with open(fname) as f: + for i, line in enumerate(f): + if i == 0: + print("HEADER:", line.strip()) + continue # Skip header + fields = line.strip().split(",") + all_features.append([float(v.replace('"', "")) for v in fields[:-1]]) + all_targets.append([int(fields[-1].replace('"', ""))]) + if i == 1: + print("EXAMPLE FEATURES:", all_features[-1]) + +features = np.array(all_features, dtype="float32") +targets = np.array(all_targets, dtype="uint8") +print("features.shape:", features.shape) +print("targets.shape:", targets.shape) + +""" +## Prepare a validation set +""" + +num_val_samples = int(len(features) * 0.2) +train_features = features[:-num_val_samples] +train_targets = targets[:-num_val_samples] +val_features = features[-num_val_samples:] +val_targets = targets[-num_val_samples:] + +print("Number of training samples:", len(train_features)) +print("Number of validation samples:", len(val_features)) + +""" +## Analyze class imbalance in the targets +""" + +counts = np.bincount(train_targets[:, 0]) +print( + "Number of positive samples in training data: {} ({:.2f}% of total)".format( + counts[1], 100 * float(counts[1]) / len(train_targets) + ) +) + +weight_for_0 = 1.0 / counts[0] +weight_for_1 = 1.0 / counts[1] + +""" +## Normalize the data using training set statistics +""" + +mean = np.mean(train_features, axis=0) +train_features -= mean +val_features -= mean +std = np.std(train_features, axis=0) +train_features /= std +val_features /= std + +""" +## Build a binary classification model +""" + +import keras + +model = keras.Sequential( + [ + keras.Input(shape=train_features.shape[1:]), + keras.layers.Dense(256, activation="relu"), + keras.layers.Dense(256, activation="relu"), + keras.layers.Dropout(0.3), + keras.layers.Dense(256, activation="relu"), + keras.layers.Dropout(0.3), + keras.layers.Dense(1, activation="sigmoid"), + ] +) +model.summary() + +""" +## Train the model with `class_weight` argument +""" + +metrics = [ + keras.metrics.FalseNegatives(name="fn"), + keras.metrics.FalsePositives(name="fp"), + keras.metrics.TrueNegatives(name="tn"), + keras.metrics.TruePositives(name="tp"), + keras.metrics.Precision(name="precision"), + keras.metrics.Recall(name="recall"), +] + +model.compile( + optimizer=keras.optimizers.Adam(1e-2), loss="binary_crossentropy", metrics=metrics +) + +callbacks = [keras.callbacks.ModelCheckpoint("fraud_model_at_epoch_{epoch}.keras")] +class_weight = {0: weight_for_0, 1: weight_for_1} + +model.fit( + train_features, + train_targets, + batch_size=2048, + epochs=30, + verbose=2, + callbacks=callbacks, + validation_data=(val_features, val_targets), + class_weight=class_weight, +) + +""" +## Conclusions + +At the end of training, out of 56,961 validation transactions, we are: + +- Correctly identifying 66 of them as fraudulent +- Missing 9 fraudulent transactions +- At the cost of incorrectly flagging 441 legitimate transactions + +In the real world, one would put an even higher weight on class 1, +so as to reflect that False Negatives are more costly than False Positives. + +Next time your credit card gets declined in an online purchase -- this is why. + +""" diff --git a/.tether/vignettes-src/examples/structured_data/structured_data_classification_with_feature_space.Rmd b/.tether/vignettes-src/examples/structured_data/structured_data_classification_with_feature_space.Rmd new file mode 100644 index 0000000000..b2c5879ea4 --- /dev/null +++ b/.tether/vignettes-src/examples/structured_data/structured_data_classification_with_feature_space.Rmd @@ -0,0 +1,380 @@ +--- +title: Structured data classification with FeatureSpace +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2022/11/09 +last-modified: 2022/11/09 +description: Classify tabular data in a few lines of code. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/structured_data/structured_data_classification_with_feature_space.py +--- + +## Introduction + +This example demonstrates how to do structured data classification +(also known as tabular data classification), starting from a raw +CSV file. Our data includes numerical features, +and integer categorical features, and string categorical features. +We will use the utility `keras.utils.FeatureSpace` to index, +preprocess, and encode our features. + +The code is adapted from the example +[Structured data classification from scratch](https://keras.io/examples/structured_data/structured_data_classification_from_scratch/). +While the previous example managed its own low-level feature preprocessing and +encoding with Keras preprocessing layers, in this example we +delegate everything to `FeatureSpace`, making the workflow +extremely quick and easy. + +### The dataset + +[Our dataset](https://archive.ics.uci.edu/ml/datasets/heart+Disease) is provided by the +Cleveland Clinic Foundation for Heart Disease. +It's a CSV file with 303 rows. Each row contains information about a patient (a +**sample**), and each column describes an attribute of the patient (a **feature**). We +use the features to predict whether a patient has a heart disease +(**binary classification**). + +Here's the description of each feature: + +Column| Description| Feature Type +------------|--------------------|---------------------- +Age | Age in years | Numerical +Sex | (1 = male; 0 = female) | Categorical +CP | Chest pain type (0, 1, 2, 3, 4) | Categorical +Trestbpd | Resting blood pressure (in mm Hg on admission) | Numerical +Chol | Serum cholesterol in mg/dl | Numerical +FBS | fasting blood sugar in 120 mg/dl (1 = true; 0 = false) | Categorical +RestECG | Resting electrocardiogram results (0, 1, 2) | Categorical +Thalach | Maximum heart rate achieved | Numerical +Exang | Exercise induced angina (1 = yes; 0 = no) | Categorical +Oldpeak | ST depression induced by exercise relative to rest | Numerical +Slope | Slope of the peak exercise ST segment | Numerical +CA | Number of major vessels (0-3) colored by fluoroscopy | Both numerical & categorical +Thal | 3 = normal; 6 = fixed defect; 7 = reversible defect | Categorical +Target | Diagnosis of heart disease (1 = true; 0 = false) | Target + +## Setup + +```python +import os + +os.environ["KERAS_BACKEND"] = "tensorflow" + +import tensorflow as tf +import pandas as pd +import keras +from keras.utils import FeatureSpace +``` + +## Preparing the data + +Let's download the data and load it into a Pandas dataframe: + +```python +file_url = "http://storage.googleapis.com/download.tensorflow.org/data/heart.csv" +dataframe = pd.read_csv(file_url) +``` + +The dataset includes 303 samples with 14 columns per sample +(13 features, plus the target label): + +```python +print(dataframe.shape) +``` + +Here's a preview of a few samples: + +```python +dataframe.head() +``` + +The last column, "target", indicates whether the patient +has a heart disease (1) or not (0). + +Let's split the data into a training and validation set: + +```python +val_dataframe = dataframe.sample(frac=0.2, random_state=1337) +train_dataframe = dataframe.drop(val_dataframe.index) + +print( + "Using %d samples for training and %d for validation" + % (len(train_dataframe), len(val_dataframe)) +) +``` + +Let's generate `tf.data.Dataset` objects for each dataframe: + +```python +def dataframe_to_dataset(dataframe): + dataframe = dataframe.copy() + labels = dataframe.pop("target") + ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels)) + ds = ds.shuffle(buffer_size=len(dataframe)) + return ds + + +train_ds = dataframe_to_dataset(train_dataframe) +val_ds = dataframe_to_dataset(val_dataframe) +``` + +Each `Dataset` yields a tuple `(input, target)` where `input` is a dictionary of features +and `target` is the value `0` or `1`: + +```python +for x, y in train_ds.take(1): + print("Input:", x) + print("Target:", y) +``` + +Let's batch the datasets: + +```python +train_ds = train_ds.batch(32) +val_ds = val_ds.batch(32) +``` + +## Configuring a `FeatureSpace` + +To configure how each feature should be preprocessed, +we instantiate a `keras.utils.FeatureSpace`, and we +pass to it a dictionary that maps the name of our features +to a string that describes the feature type. + +We have a few "integer categorical" features such as `"FBS"`, +one "string categorical" feature (`"thal"`), +and a few numerical features, which we'd like to normalize +-- except `"age"`, which we'd like to discretize into +a number of bins. + +We also use the `crosses` argument +to capture *feature interactions* for some categorical +features, that is to say, create additional features +that represent value co-occurrences for these categorical features. +You can compute feature crosses like this for arbitrary sets of +categorical features -- not just tuples of two features. +Because the resulting co-occurences are hashed +into a fixed-sized vector, you don't need to worry about whether +the co-occurence space is too large. + +```python +feature_space = FeatureSpace( + features={ + # Categorical features encoded as integers + "sex": "integer_categorical", + "cp": "integer_categorical", + "fbs": "integer_categorical", + "restecg": "integer_categorical", + "exang": "integer_categorical", + "ca": "integer_categorical", + # Categorical feature encoded as string + "thal": "string_categorical", + # Numerical features to discretize + "age": "float_discretized", + # Numerical features to normalize + "trestbps": "float_normalized", + "chol": "float_normalized", + "thalach": "float_normalized", + "oldpeak": "float_normalized", + "slope": "float_normalized", + }, + # We create additional features by hashing + # value co-occurrences for the + # following groups of categorical features. + crosses=[("sex", "age"), ("thal", "ca")], + # The hashing space for these co-occurrences + # wil be 32-dimensional. + crossing_dim=32, + # Our utility will one-hot encode all categorical + # features and concat all features into a single + # vector (one vector per sample). + output_mode="concat", +) +``` + +## Further customizing a `FeatureSpace` + +Specifying the feature type via a string name is quick and easy, +but sometimes you may want to further configure the preprocessing +of each feature. For instance, in our case, our categorical +features don't have a large set of possible values -- it's only +a handful of values per feature (e.g. `1` and `0` for the feature `"FBS"`), +and all possible values are represented in the training set. +As a result, we don't need to reserve an index to represent "out of vocabulary" values +for these features -- which would have been the default behavior. +Below, we just specify `num_oov_indices=0` in each of these features +to tell the feature preprocessor to skip "out of vocabulary" indexing. + +Other customizations you have access to include specifying the number of +bins for discretizing features of type `"float_discretized"`, +or the dimensionality of the hashing space for feature crossing. + +```python +feature_space = FeatureSpace( + features={ + # Categorical features encoded as integers + "sex": FeatureSpace.integer_categorical(num_oov_indices=0), + "cp": FeatureSpace.integer_categorical(num_oov_indices=0), + "fbs": FeatureSpace.integer_categorical(num_oov_indices=0), + "restecg": FeatureSpace.integer_categorical(num_oov_indices=0), + "exang": FeatureSpace.integer_categorical(num_oov_indices=0), + "ca": FeatureSpace.integer_categorical(num_oov_indices=0), + # Categorical feature encoded as string + "thal": FeatureSpace.string_categorical(num_oov_indices=0), + # Numerical features to discretize + "age": FeatureSpace.float_discretized(num_bins=30), + # Numerical features to normalize + "trestbps": FeatureSpace.float_normalized(), + "chol": FeatureSpace.float_normalized(), + "thalach": FeatureSpace.float_normalized(), + "oldpeak": FeatureSpace.float_normalized(), + "slope": FeatureSpace.float_normalized(), + }, + # Specify feature cross with a custom crossing dim. + crosses=[ + FeatureSpace.cross(feature_names=("sex", "age"), crossing_dim=64), + FeatureSpace.cross( + feature_names=("thal", "ca"), + crossing_dim=16, + ), + ], + output_mode="concat", +) +``` + +## Adapt the `FeatureSpace` to the training data + +Before we start using the `FeatureSpace` to build a model, we have +to adapt it to the training data. During `adapt()`, the `FeatureSpace` will: + +- Index the set of possible values for categorical features. +- Compute the mean and variance for numerical features to normalize. +- Compute the value boundaries for the different bins for numerical features to discretize. + +Note that `adapt()` should be called on a `tf.data.Dataset` which yields dicts +of feature values -- no labels. + +```python +train_ds_with_no_labels = train_ds.map(lambda x, _: x) +feature_space.adapt(train_ds_with_no_labels) +``` + +At this point, the `FeatureSpace` can be called on a dict of raw feature values, and will return a +single concatenate vector for each sample, combining encoded features and feature crosses. + +```python +for x, _ in train_ds.take(1): + preprocessed_x = feature_space(x) + print("preprocessed_x.shape:", preprocessed_x.shape) + print("preprocessed_x.dtype:", preprocessed_x.dtype) +``` + +## Two ways to manage preprocessing: as part of the `tf.data` pipeline, or in the model itself + +There are two ways in which you can leverage your `FeatureSpace`: + +### Asynchronous preprocessing in `tf.data` + +You can make it part of your data pipeline, before the model. This enables asynchronous parallel +preprocessing of the data on CPU before it hits the model. Do this if you're training on GPU or TPU, +or if you want to speed up preprocessing. Usually, this is always the right thing to do during training. + +### Synchronous preprocessing in the model + +You can make it part of your model. This means that the model will expect dicts of raw feature +values, and the preprocessing batch will be done synchronously (in a blocking manner) before the +rest of the forward pass. Do this if you want to have an end-to-end model that can process +raw feature values -- but keep in mind that your model will only be able to run on CPU, +since most types of feature preprocessing (e.g. string preprocessing) are not GPU or TPU compatible. + +Do not do this on GPU / TPU or in performance-sensitive settings. In general, you want to do in-model +preprocessing when you do inference on CPU. + +In our case, we will apply the `FeatureSpace` in the tf.data pipeline during training, but we will +do inference with an end-to-end model that includes the `FeatureSpace`. + +Let's create a training and validation dataset of preprocessed batches: + +```python +preprocessed_train_ds = train_ds.map( + lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE +) +preprocessed_train_ds = preprocessed_train_ds.prefetch(tf.data.AUTOTUNE) + +preprocessed_val_ds = val_ds.map( + lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE +) +preprocessed_val_ds = preprocessed_val_ds.prefetch(tf.data.AUTOTUNE) +``` + +## Build a model + +Time to build a model -- or rather two models: + +- A training model that expects preprocessed features (one sample = one vector) +- An inference model that expects raw features (one sample = dict of raw feature values) + +```python +dict_inputs = feature_space.get_inputs() +encoded_features = feature_space.get_encoded_features() + +x = keras.layers.Dense(32, activation="relu")(encoded_features) +x = keras.layers.Dropout(0.5)(x) +predictions = keras.layers.Dense(1, activation="sigmoid")(x) + +training_model = keras.Model(inputs=encoded_features, outputs=predictions) +training_model.compile( + optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"] +) + +inference_model = keras.Model(inputs=dict_inputs, outputs=predictions) +``` + +## Train the model + +Let's train our model for 50 epochs. Note that feature preprocessing is happening +as part of the tf.data pipeline, not as part of the model. + +```python +training_model.fit( + preprocessed_train_ds, + epochs=20, + validation_data=preprocessed_val_ds, + verbose=2, +) +``` + +We quickly get to 80% validation accuracy. + +## Inference on new data with the end-to-end model + +Now, we can use our inference model (which includes the `FeatureSpace`) +to make predictions based on dicts of raw features values, as follows: + +```python +sample = { + "age": 60, + "sex": 1, + "cp": 1, + "trestbps": 145, + "chol": 233, + "fbs": 1, + "restecg": 2, + "thalach": 150, + "exang": 0, + "oldpeak": 2.3, + "slope": 3, + "ca": 0, + "thal": "fixed", +} + +input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()} +predictions = inference_model.predict(input_dict) + +print( + f"This particular patient had a {100 * predictions[0][0]:.2f}% probability " + "of having a heart disease, as evaluated by our model." +) +``` + diff --git a/.tether/vignettes-src/examples/structured_data_classification_with_feature_space.Rmd b/.tether/vignettes-src/examples/structured_data_classification_with_feature_space.Rmd new file mode 100644 index 0000000000..b2c5879ea4 --- /dev/null +++ b/.tether/vignettes-src/examples/structured_data_classification_with_feature_space.Rmd @@ -0,0 +1,380 @@ +--- +title: Structured data classification with FeatureSpace +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2022/11/09 +last-modified: 2022/11/09 +description: Classify tabular data in a few lines of code. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/structured_data/structured_data_classification_with_feature_space.py +--- + +## Introduction + +This example demonstrates how to do structured data classification +(also known as tabular data classification), starting from a raw +CSV file. Our data includes numerical features, +and integer categorical features, and string categorical features. +We will use the utility `keras.utils.FeatureSpace` to index, +preprocess, and encode our features. + +The code is adapted from the example +[Structured data classification from scratch](https://keras.io/examples/structured_data/structured_data_classification_from_scratch/). +While the previous example managed its own low-level feature preprocessing and +encoding with Keras preprocessing layers, in this example we +delegate everything to `FeatureSpace`, making the workflow +extremely quick and easy. + +### The dataset + +[Our dataset](https://archive.ics.uci.edu/ml/datasets/heart+Disease) is provided by the +Cleveland Clinic Foundation for Heart Disease. +It's a CSV file with 303 rows. Each row contains information about a patient (a +**sample**), and each column describes an attribute of the patient (a **feature**). We +use the features to predict whether a patient has a heart disease +(**binary classification**). + +Here's the description of each feature: + +Column| Description| Feature Type +------------|--------------------|---------------------- +Age | Age in years | Numerical +Sex | (1 = male; 0 = female) | Categorical +CP | Chest pain type (0, 1, 2, 3, 4) | Categorical +Trestbpd | Resting blood pressure (in mm Hg on admission) | Numerical +Chol | Serum cholesterol in mg/dl | Numerical +FBS | fasting blood sugar in 120 mg/dl (1 = true; 0 = false) | Categorical +RestECG | Resting electrocardiogram results (0, 1, 2) | Categorical +Thalach | Maximum heart rate achieved | Numerical +Exang | Exercise induced angina (1 = yes; 0 = no) | Categorical +Oldpeak | ST depression induced by exercise relative to rest | Numerical +Slope | Slope of the peak exercise ST segment | Numerical +CA | Number of major vessels (0-3) colored by fluoroscopy | Both numerical & categorical +Thal | 3 = normal; 6 = fixed defect; 7 = reversible defect | Categorical +Target | Diagnosis of heart disease (1 = true; 0 = false) | Target + +## Setup + +```python +import os + +os.environ["KERAS_BACKEND"] = "tensorflow" + +import tensorflow as tf +import pandas as pd +import keras +from keras.utils import FeatureSpace +``` + +## Preparing the data + +Let's download the data and load it into a Pandas dataframe: + +```python +file_url = "http://storage.googleapis.com/download.tensorflow.org/data/heart.csv" +dataframe = pd.read_csv(file_url) +``` + +The dataset includes 303 samples with 14 columns per sample +(13 features, plus the target label): + +```python +print(dataframe.shape) +``` + +Here's a preview of a few samples: + +```python +dataframe.head() +``` + +The last column, "target", indicates whether the patient +has a heart disease (1) or not (0). + +Let's split the data into a training and validation set: + +```python +val_dataframe = dataframe.sample(frac=0.2, random_state=1337) +train_dataframe = dataframe.drop(val_dataframe.index) + +print( + "Using %d samples for training and %d for validation" + % (len(train_dataframe), len(val_dataframe)) +) +``` + +Let's generate `tf.data.Dataset` objects for each dataframe: + +```python +def dataframe_to_dataset(dataframe): + dataframe = dataframe.copy() + labels = dataframe.pop("target") + ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels)) + ds = ds.shuffle(buffer_size=len(dataframe)) + return ds + + +train_ds = dataframe_to_dataset(train_dataframe) +val_ds = dataframe_to_dataset(val_dataframe) +``` + +Each `Dataset` yields a tuple `(input, target)` where `input` is a dictionary of features +and `target` is the value `0` or `1`: + +```python +for x, y in train_ds.take(1): + print("Input:", x) + print("Target:", y) +``` + +Let's batch the datasets: + +```python +train_ds = train_ds.batch(32) +val_ds = val_ds.batch(32) +``` + +## Configuring a `FeatureSpace` + +To configure how each feature should be preprocessed, +we instantiate a `keras.utils.FeatureSpace`, and we +pass to it a dictionary that maps the name of our features +to a string that describes the feature type. + +We have a few "integer categorical" features such as `"FBS"`, +one "string categorical" feature (`"thal"`), +and a few numerical features, which we'd like to normalize +-- except `"age"`, which we'd like to discretize into +a number of bins. + +We also use the `crosses` argument +to capture *feature interactions* for some categorical +features, that is to say, create additional features +that represent value co-occurrences for these categorical features. +You can compute feature crosses like this for arbitrary sets of +categorical features -- not just tuples of two features. +Because the resulting co-occurences are hashed +into a fixed-sized vector, you don't need to worry about whether +the co-occurence space is too large. + +```python +feature_space = FeatureSpace( + features={ + # Categorical features encoded as integers + "sex": "integer_categorical", + "cp": "integer_categorical", + "fbs": "integer_categorical", + "restecg": "integer_categorical", + "exang": "integer_categorical", + "ca": "integer_categorical", + # Categorical feature encoded as string + "thal": "string_categorical", + # Numerical features to discretize + "age": "float_discretized", + # Numerical features to normalize + "trestbps": "float_normalized", + "chol": "float_normalized", + "thalach": "float_normalized", + "oldpeak": "float_normalized", + "slope": "float_normalized", + }, + # We create additional features by hashing + # value co-occurrences for the + # following groups of categorical features. + crosses=[("sex", "age"), ("thal", "ca")], + # The hashing space for these co-occurrences + # wil be 32-dimensional. + crossing_dim=32, + # Our utility will one-hot encode all categorical + # features and concat all features into a single + # vector (one vector per sample). + output_mode="concat", +) +``` + +## Further customizing a `FeatureSpace` + +Specifying the feature type via a string name is quick and easy, +but sometimes you may want to further configure the preprocessing +of each feature. For instance, in our case, our categorical +features don't have a large set of possible values -- it's only +a handful of values per feature (e.g. `1` and `0` for the feature `"FBS"`), +and all possible values are represented in the training set. +As a result, we don't need to reserve an index to represent "out of vocabulary" values +for these features -- which would have been the default behavior. +Below, we just specify `num_oov_indices=0` in each of these features +to tell the feature preprocessor to skip "out of vocabulary" indexing. + +Other customizations you have access to include specifying the number of +bins for discretizing features of type `"float_discretized"`, +or the dimensionality of the hashing space for feature crossing. + +```python +feature_space = FeatureSpace( + features={ + # Categorical features encoded as integers + "sex": FeatureSpace.integer_categorical(num_oov_indices=0), + "cp": FeatureSpace.integer_categorical(num_oov_indices=0), + "fbs": FeatureSpace.integer_categorical(num_oov_indices=0), + "restecg": FeatureSpace.integer_categorical(num_oov_indices=0), + "exang": FeatureSpace.integer_categorical(num_oov_indices=0), + "ca": FeatureSpace.integer_categorical(num_oov_indices=0), + # Categorical feature encoded as string + "thal": FeatureSpace.string_categorical(num_oov_indices=0), + # Numerical features to discretize + "age": FeatureSpace.float_discretized(num_bins=30), + # Numerical features to normalize + "trestbps": FeatureSpace.float_normalized(), + "chol": FeatureSpace.float_normalized(), + "thalach": FeatureSpace.float_normalized(), + "oldpeak": FeatureSpace.float_normalized(), + "slope": FeatureSpace.float_normalized(), + }, + # Specify feature cross with a custom crossing dim. + crosses=[ + FeatureSpace.cross(feature_names=("sex", "age"), crossing_dim=64), + FeatureSpace.cross( + feature_names=("thal", "ca"), + crossing_dim=16, + ), + ], + output_mode="concat", +) +``` + +## Adapt the `FeatureSpace` to the training data + +Before we start using the `FeatureSpace` to build a model, we have +to adapt it to the training data. During `adapt()`, the `FeatureSpace` will: + +- Index the set of possible values for categorical features. +- Compute the mean and variance for numerical features to normalize. +- Compute the value boundaries for the different bins for numerical features to discretize. + +Note that `adapt()` should be called on a `tf.data.Dataset` which yields dicts +of feature values -- no labels. + +```python +train_ds_with_no_labels = train_ds.map(lambda x, _: x) +feature_space.adapt(train_ds_with_no_labels) +``` + +At this point, the `FeatureSpace` can be called on a dict of raw feature values, and will return a +single concatenate vector for each sample, combining encoded features and feature crosses. + +```python +for x, _ in train_ds.take(1): + preprocessed_x = feature_space(x) + print("preprocessed_x.shape:", preprocessed_x.shape) + print("preprocessed_x.dtype:", preprocessed_x.dtype) +``` + +## Two ways to manage preprocessing: as part of the `tf.data` pipeline, or in the model itself + +There are two ways in which you can leverage your `FeatureSpace`: + +### Asynchronous preprocessing in `tf.data` + +You can make it part of your data pipeline, before the model. This enables asynchronous parallel +preprocessing of the data on CPU before it hits the model. Do this if you're training on GPU or TPU, +or if you want to speed up preprocessing. Usually, this is always the right thing to do during training. + +### Synchronous preprocessing in the model + +You can make it part of your model. This means that the model will expect dicts of raw feature +values, and the preprocessing batch will be done synchronously (in a blocking manner) before the +rest of the forward pass. Do this if you want to have an end-to-end model that can process +raw feature values -- but keep in mind that your model will only be able to run on CPU, +since most types of feature preprocessing (e.g. string preprocessing) are not GPU or TPU compatible. + +Do not do this on GPU / TPU or in performance-sensitive settings. In general, you want to do in-model +preprocessing when you do inference on CPU. + +In our case, we will apply the `FeatureSpace` in the tf.data pipeline during training, but we will +do inference with an end-to-end model that includes the `FeatureSpace`. + +Let's create a training and validation dataset of preprocessed batches: + +```python +preprocessed_train_ds = train_ds.map( + lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE +) +preprocessed_train_ds = preprocessed_train_ds.prefetch(tf.data.AUTOTUNE) + +preprocessed_val_ds = val_ds.map( + lambda x, y: (feature_space(x), y), num_parallel_calls=tf.data.AUTOTUNE +) +preprocessed_val_ds = preprocessed_val_ds.prefetch(tf.data.AUTOTUNE) +``` + +## Build a model + +Time to build a model -- or rather two models: + +- A training model that expects preprocessed features (one sample = one vector) +- An inference model that expects raw features (one sample = dict of raw feature values) + +```python +dict_inputs = feature_space.get_inputs() +encoded_features = feature_space.get_encoded_features() + +x = keras.layers.Dense(32, activation="relu")(encoded_features) +x = keras.layers.Dropout(0.5)(x) +predictions = keras.layers.Dense(1, activation="sigmoid")(x) + +training_model = keras.Model(inputs=encoded_features, outputs=predictions) +training_model.compile( + optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"] +) + +inference_model = keras.Model(inputs=dict_inputs, outputs=predictions) +``` + +## Train the model + +Let's train our model for 50 epochs. Note that feature preprocessing is happening +as part of the tf.data pipeline, not as part of the model. + +```python +training_model.fit( + preprocessed_train_ds, + epochs=20, + validation_data=preprocessed_val_ds, + verbose=2, +) +``` + +We quickly get to 80% validation accuracy. + +## Inference on new data with the end-to-end model + +Now, we can use our inference model (which includes the `FeatureSpace`) +to make predictions based on dicts of raw features values, as follows: + +```python +sample = { + "age": 60, + "sex": 1, + "cp": 1, + "trestbps": 145, + "chol": 233, + "fbs": 1, + "restecg": 2, + "thalach": 150, + "exang": 0, + "oldpeak": 2.3, + "slope": 3, + "ca": 0, + "thal": "fixed", +} + +input_dict = {name: tf.convert_to_tensor([value]) for name, value in sample.items()} +predictions = inference_model.predict(input_dict) + +print( + f"This particular patient had a {100 * predictions[0][0]:.2f}% probability " + "of having a heart disease, as evaluated by our model." +) +``` + diff --git a/.tether/vignettes-src/examples/text_classification_from_scratch.Rmd b/.tether/vignettes-src/examples/text_classification_from_scratch.Rmd new file mode 100644 index 0000000000..39b7f9ed8a --- /dev/null +++ b/.tether/vignettes-src/examples/text_classification_from_scratch.Rmd @@ -0,0 +1,277 @@ +--- +title: Text classification from scratch +authors: Mark Omernick, Francois Chollet +date-created: 2019/11/06 +last-modified: 2020/05/17 +description: Text sentiment classification starting from raw text files. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/nlp/text_classification_from_scratch.py +--- + +## Introduction + +This example shows how to do text classification starting from raw text (as +a set of text files on disk). We demonstrate the workflow on the IMDB sentiment +classification dataset (unprocessed version). We use the `TextVectorization` layer for + word splitting & indexing. + +## Setup + +```python +import os + +os.environ["KERAS_BACKEND"] = "tensorflow" + +import keras +import tensorflow as tf +import numpy as np +from keras import layers +``` + +## Load the data: IMDB movie review sentiment classification + +Let's download the data and inspect its structure. + +curl -O https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz +tar -xf aclImdb_v1.tar.gz + +The `aclImdb` folder contains a `train` and `test` subfolder: + +ls aclImdb + +ls aclImdb/test + +ls aclImdb/train + +The `aclImdb/train/pos` and `aclImdb/train/neg` folders contain text files, each of + which represents one review (either positive or negative): + +cat aclImdb/train/pos/6248_7.txt + +We are only interested in the `pos` and `neg` subfolders, so let's delete the other subfolder that has text files in it: + +rm -r aclImdb/train/unsup + +You can use the utility `keras.utils.text_dataset_from_directory` to +generate a labeled `tf.data.Dataset` object from a set of text files on disk filed + into class-specific folders. + +Let's use it to generate the training, validation, and test datasets. The validation +and training datasets are generated from two subsets of the `train` directory, with 20% +of samples going to the validation dataset and 80% going to the training dataset. + +Having a validation dataset in addition to the test dataset is useful for tuning +hyperparameters, such as the model architecture, for which the test dataset should not +be used. + +Before putting the model out into the real world however, it should be retrained using all +available training data (without creating a validation dataset), so its performance is maximized. + +When using the `validation_split` & `subset` arguments, make sure to either specify a +random seed, or to pass `shuffle=False`, so that the validation & training splits you +get have no overlap. + +```python +batch_size = 32 +raw_train_ds = keras.utils.text_dataset_from_directory( + "aclImdb/train", + batch_size=batch_size, + validation_split=0.2, + subset="training", + seed=1337, +) +raw_val_ds = keras.utils.text_dataset_from_directory( + "aclImdb/train", + batch_size=batch_size, + validation_split=0.2, + subset="validation", + seed=1337, +) +raw_test_ds = keras.utils.text_dataset_from_directory( + "aclImdb/test", batch_size=batch_size +) + +print(f"Number of batches in raw_train_ds: {raw_train_ds.cardinality()}") +print(f"Number of batches in raw_val_ds: {raw_val_ds.cardinality()}") +print(f"Number of batches in raw_test_ds: {raw_test_ds.cardinality()}") +``` + +Let's preview a few samples: + +```python +# It's important to take a look at your raw data to ensure your normalization +# and tokenization will work as expected. We can do that by taking a few +# examples from the training set and looking at them. +# This is one of the places where eager execution shines: +# we can just evaluate these tensors using .numpy() +# instead of needing to evaluate them in a Session/Graph context. +for text_batch, label_batch in raw_train_ds.take(1): + for i in range(5): + print(text_batch.numpy()[i]) + print(label_batch.numpy()[i]) +``` + +## Prepare the data + +In particular, we remove `
` tags. + +```python +import string +import re + + +# Having looked at our data above, we see that the raw text contains HTML break +# tags of the form '
'. These tags will not be removed by the default +# standardizer (which doesn't strip HTML). Because of this, we will need to +# create a custom standardization function. +def custom_standardization(input_data): + lowercase = tf.strings.lower(input_data) + stripped_html = tf.strings.regex_replace(lowercase, "
", " ") + return tf.strings.regex_replace( + stripped_html, f"[{re.escape(string.punctuation)}]", "" + ) + + +# Model constants. +max_features = 20000 +embedding_dim = 128 +sequence_length = 500 + +# Now that we have our custom standardization, we can instantiate our text +# vectorization layer. We are using this layer to normalize, split, and map +# strings to integers, so we set our 'output_mode' to 'int'. +# Note that we're using the default split function, +# and the custom standardization defined above. +# We also set an explicit maximum sequence length, since the CNNs later in our +# model won't support ragged sequences. +vectorize_layer = keras.layers.TextVectorization( + standardize=custom_standardization, + max_tokens=max_features, + output_mode="int", + output_sequence_length=sequence_length, +) + +# Now that the vectorize_layer has been created, call `adapt` on a text-only +# dataset to create the vocabulary. You don't have to batch, but for very large +# datasets this means you're not keeping spare copies of the dataset in memory. + +# Let's make a text-only dataset (no labels): +text_ds = raw_train_ds.map(lambda x, y: x) +# Let's call `adapt`: +vectorize_layer.adapt(text_ds) +``` + +## Two options to vectorize the data + +There are 2 ways we can use our text vectorization layer: + +**Option 1: Make it part of the model**, so as to obtain a model that processes raw + strings, like this: + +```python +text_input = keras.Input(shape=(1,), dtype=tf.string, name='text') +x = vectorize_layer(text_input) +x = layers.Embedding(max_features + 1, embedding_dim)(x) +... +``` + +**Option 2: Apply it to the text dataset** to obtain a dataset of word indices, then + feed it into a model that expects integer sequences as inputs. + +An important difference between the two is that option 2 enables you to do +**asynchronous CPU processing and buffering** of your data when training on GPU. +So if you're training the model on GPU, you probably want to go with this option to get + the best performance. This is what we will do below. + +If we were to export our model to production, we'd ship a model that accepts raw +strings as input, like in the code snippet for option 1 above. This can be done after + training. We do this in the last section. + +```python +def vectorize_text(text, label): + text = tf.expand_dims(text, -1) + return vectorize_layer(text), label + + +# Vectorize the data. +train_ds = raw_train_ds.map(vectorize_text) +val_ds = raw_val_ds.map(vectorize_text) +test_ds = raw_test_ds.map(vectorize_text) + +# Do async prefetching / buffering of the data for best performance on GPU. +train_ds = train_ds.cache().prefetch(buffer_size=10) +val_ds = val_ds.cache().prefetch(buffer_size=10) +test_ds = test_ds.cache().prefetch(buffer_size=10) +``` + +## Build a model + +We choose a simple 1D convnet starting with an `Embedding` layer. + +```python +# A integer input for vocab indices. +inputs = keras.Input(shape=(None,), dtype="int64") + +# Next, we add a layer to map those vocab indices into a space of dimensionality +# 'embedding_dim'. +x = layers.Embedding(max_features, embedding_dim)(inputs) +x = layers.Dropout(0.5)(x) + +# Conv1D + global max pooling +x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) +x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) +x = layers.GlobalMaxPooling1D()(x) + +# We add a vanilla hidden layer: +x = layers.Dense(128, activation="relu")(x) +x = layers.Dropout(0.5)(x) + +# We project onto a single unit output layer, and squash it with a sigmoid: +predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x) + +model = keras.Model(inputs, predictions) + +# Compile the model with binary crossentropy loss and an adam optimizer. +model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) +``` + +## Train the model + +```python +epochs = 3 + +# Fit the model using the train and test datasets. +model.fit(train_ds, validation_data=val_ds, epochs=epochs) +``` + +## Evaluate the model on the test set + +```python +model.evaluate(test_ds) +``` + +## Make an end-to-end model + +If you want to obtain a model capable of processing raw strings, you can simply +create a new model (using the weights we just trained): + +```python +# A string input +inputs = keras.Input(shape=(1,), dtype="string") +# Turn strings into vocab indices +indices = vectorize_layer(inputs) +# Turn vocab indices into predictions +outputs = model(indices) + +# Our end to end model +end_to_end_model = keras.Model(inputs, outputs) +end_to_end_model.compile( + loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"] +) + +# Test it with `raw_test_ds`, which yields raw strings +end_to_end_model.evaluate(raw_test_ds) +``` + diff --git a/.tether/vignettes-src/examples/timeseries/timeseries_anomaly_detection.Rmd b/.tether/vignettes-src/examples/timeseries/timeseries_anomaly_detection.Rmd new file mode 100644 index 0000000000..452e4d761a --- /dev/null +++ b/.tether/vignettes-src/examples/timeseries/timeseries_anomaly_detection.Rmd @@ -0,0 +1,306 @@ +""" +Title: Timeseries anomaly detection using an Autoencoder +Author: [pavithrasv](https://github.com/pavithrasv) +Date created: 2020/05/31 +Last modified: 2020/05/31 +Description: Detect anomalies in a timeseries using an Autoencoder. +Accelerator: GPU +""" + +""" +## Introduction + +This script demonstrates how you can use a reconstruction convolutional +autoencoder model to detect anomalies in timeseries data. +""" + +""" +## Setup +""" + +import numpy as np +import pandas as pd +import keras +from keras import layers +from matplotlib import pyplot as plt + +""" +## Load the data + +We will use the [Numenta Anomaly Benchmark(NAB)]( +https://www.kaggle.com/boltzmannbrain/nab) dataset. It provides artificial +timeseries data containing labeled anomalous periods of behavior. Data are +ordered, timestamped, single-valued metrics. + +We will use the `art_daily_small_noise.csv` file for training and the +`art_daily_jumpsup.csv` file for testing. The simplicity of this dataset +allows us to demonstrate anomaly detection effectively. +""" + +master_url_root = "https://raw.githubusercontent.com/numenta/NAB/master/data/" + +df_small_noise_url_suffix = "artificialNoAnomaly/art_daily_small_noise.csv" +df_small_noise_url = master_url_root + df_small_noise_url_suffix +df_small_noise = pd.read_csv( + df_small_noise_url, parse_dates=True, index_col="timestamp" +) + +df_daily_jumpsup_url_suffix = "artificialWithAnomaly/art_daily_jumpsup.csv" +df_daily_jumpsup_url = master_url_root + df_daily_jumpsup_url_suffix +df_daily_jumpsup = pd.read_csv( + df_daily_jumpsup_url, parse_dates=True, index_col="timestamp" +) + +""" +## Quick look at the data +""" + +print(df_small_noise.head()) + +print(df_daily_jumpsup.head()) + +""" +## Visualize the data +### Timeseries data without anomalies + +We will use the following data for training. +""" +fig, ax = plt.subplots() +df_small_noise.plot(legend=False, ax=ax) +plt.show() + +""" +### Timeseries data with anomalies + +We will use the following data for testing and see if the sudden jump up in the +data is detected as an anomaly. +""" +fig, ax = plt.subplots() +df_daily_jumpsup.plot(legend=False, ax=ax) +plt.show() + +""" +## Prepare training data + +Get data values from the training timeseries data file and normalize the +`value` data. We have a `value` for every 5 mins for 14 days. + +- 24 * 60 / 5 = **288 timesteps per day** +- 288 * 14 = **4032 data points** in total +""" + + +# Normalize and save the mean and std we get, +# for normalizing test data. +training_mean = df_small_noise.mean() +training_std = df_small_noise.std() +df_training_value = (df_small_noise - training_mean) / training_std +print("Number of training samples:", len(df_training_value)) + +""" +### Create sequences +Create sequences combining `TIME_STEPS` contiguous data values from the +training data. +""" + +TIME_STEPS = 288 + + +# Generated training sequences for use in the model. +def create_sequences(values, time_steps=TIME_STEPS): + output = [] + for i in range(len(values) - time_steps + 1): + output.append(values[i : (i + time_steps)]) + return np.stack(output) + + +x_train = create_sequences(df_training_value.values) +print("Training input shape: ", x_train.shape) + +""" +## Build a model + +We will build a convolutional reconstruction autoencoder model. The model will +take input of shape `(batch_size, sequence_length, num_features)` and return +output of the same shape. In this case, `sequence_length` is 288 and +`num_features` is 1. +""" + +model = keras.Sequential( + [ + layers.Input(shape=(x_train.shape[1], x_train.shape[2])), + layers.Conv1D( + filters=32, + kernel_size=7, + padding="same", + strides=2, + activation="relu", + ), + layers.Dropout(rate=0.2), + layers.Conv1D( + filters=16, + kernel_size=7, + padding="same", + strides=2, + activation="relu", + ), + layers.Conv1DTranspose( + filters=16, + kernel_size=7, + padding="same", + strides=2, + activation="relu", + ), + layers.Dropout(rate=0.2), + layers.Conv1DTranspose( + filters=32, + kernel_size=7, + padding="same", + strides=2, + activation="relu", + ), + layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"), + ] +) +model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse") +model.summary() + +""" +## Train the model + +Please note that we are using `x_train` as both the input and the target +since this is a reconstruction model. +""" + +history = model.fit( + x_train, + x_train, + epochs=50, + batch_size=128, + validation_split=0.1, + callbacks=[ + keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min") + ], +) + +""" +Let's plot training and validation loss to see how the training went. +""" + +plt.plot(history.history["loss"], label="Training Loss") +plt.plot(history.history["val_loss"], label="Validation Loss") +plt.legend() +plt.show() + +""" +## Detecting anomalies + +We will detect anomalies by determining how well our model can reconstruct +the input data. + + +1. Find MAE loss on training samples. +2. Find max MAE loss value. This is the worst our model has performed trying +to reconstruct a sample. We will make this the `threshold` for anomaly +detection. +3. If the reconstruction loss for a sample is greater than this `threshold` +value then we can infer that the model is seeing a pattern that it isn't +familiar with. We will label this sample as an `anomaly`. + + +""" + +# Get train MAE loss. +x_train_pred = model.predict(x_train) +train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1) + +plt.hist(train_mae_loss, bins=50) +plt.xlabel("Train MAE loss") +plt.ylabel("No of samples") +plt.show() + +# Get reconstruction loss threshold. +threshold = np.max(train_mae_loss) +print("Reconstruction error threshold: ", threshold) + +""" +### Compare recontruction + +Just for fun, let's see how our model has recontructed the first sample. +This is the 288 timesteps from day 1 of our training dataset. +""" + +# Checking how the first sequence is learnt +plt.plot(x_train[0]) +plt.plot(x_train_pred[0]) +plt.show() + +""" +### Prepare test data +""" + + +df_test_value = (df_daily_jumpsup - training_mean) / training_std +fig, ax = plt.subplots() +df_test_value.plot(legend=False, ax=ax) +plt.show() + +# Create sequences from test values. +x_test = create_sequences(df_test_value.values) +print("Test input shape: ", x_test.shape) + +# Get test MAE loss. +x_test_pred = model.predict(x_test) +test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1) +test_mae_loss = test_mae_loss.reshape((-1)) + +plt.hist(test_mae_loss, bins=50) +plt.xlabel("test MAE loss") +plt.ylabel("No of samples") +plt.show() + +# Detect all the samples which are anomalies. +anomalies = test_mae_loss > threshold +print("Number of anomaly samples: ", np.sum(anomalies)) +print("Indices of anomaly samples: ", np.where(anomalies)) + +""" +## Plot anomalies + +We now know the samples of the data which are anomalies. With this, we will +find the corresponding `timestamps` from the original test data. We will be +using the following method to do that: + +Let's say time_steps = 3 and we have 10 training values. Our `x_train` will +look like this: + +- 0, 1, 2 +- 1, 2, 3 +- 2, 3, 4 +- 3, 4, 5 +- 4, 5, 6 +- 5, 6, 7 +- 6, 7, 8 +- 7, 8, 9 + +All except the initial and the final time_steps-1 data values, will appear in +`time_steps` number of samples. So, if we know that the samples +[(3, 4, 5), (4, 5, 6), (5, 6, 7)] are anomalies, we can say that the data point +5 is an anomaly. +""" + +# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies +anomalous_data_indices = [] +for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1): + if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]): + anomalous_data_indices.append(data_idx) + +""" +Let's overlay the anomalies on the original test data plot. +""" + +df_subset = df_daily_jumpsup.iloc[anomalous_data_indices] +fig, ax = plt.subplots() +df_daily_jumpsup.plot(legend=False, ax=ax) +df_subset.plot(legend=False, ax=ax, color="r") +plt.show() diff --git a/.tether/vignettes-src/examples/timeseries/timeseries_classification_from_scratch.Rmd b/.tether/vignettes-src/examples/timeseries/timeseries_classification_from_scratch.Rmd new file mode 100644 index 0000000000..04dbb7e3d7 --- /dev/null +++ b/.tether/vignettes-src/examples/timeseries/timeseries_classification_from_scratch.Rmd @@ -0,0 +1,222 @@ +--- +title: Timeseries classification from scratch +author: '[hfawaz](https://github.com/hfawaz/)' +date-created: 2020/07/21 +last-modified: 2023/11/10 +description: Training a timeseries classifier from scratch on the FordA dataset from + the UCR/UEA archive. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/timeseries/timeseries_classification_from_scratch.py +--- + +## Introduction + +This example shows how to do timeseries classification from scratch, starting from raw +CSV timeseries files on disk. We demonstrate the workflow on the FordA dataset from the +[UCR/UEA archive](https://www.cs.ucr.edu/%7Eeamonn/time_series_data_2018/). + +## Setup + +```python +import keras +import numpy as np +import matplotlib.pyplot as plt +``` + +## Load the data: the FordA dataset + +### Dataset description + +The dataset we are using here is called FordA. +The data comes from the UCR archive. +The dataset contains 3601 training instances and another 1320 testing instances. +Each timeseries corresponds to a measurement of engine noise captured by a motor sensor. +For this task, the goal is to automatically detect the presence of a specific issue with +the engine. The problem is a balanced binary classification task. The full description of +this dataset can be found [here](http://www.j-wichard.de/publications/FordPaper.pdf). + +### Read the TSV data + +We will use the `FordA_TRAIN` file for training and the +`FordA_TEST` file for testing. The simplicity of this dataset +allows us to demonstrate effectively how to use ConvNets for timeseries classification. +In this file, the first column corresponds to the label. + +```python +def readucr(filename): + data = np.loadtxt(filename, delimiter="\t") + y = data[:, 0] + x = data[:, 1:] + return x, y.astype(int) + + +root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/" + +x_train, y_train = readucr(root_url + "FordA_TRAIN.tsv") +x_test, y_test = readucr(root_url + "FordA_TEST.tsv") +``` + +## Visualize the data + +Here we visualize one timeseries example for each class in the dataset. + +```python +classes = np.unique(np.concatenate((y_train, y_test), axis=0)) + +plt.figure() +for c in classes: + c_x_train = x_train[y_train == c] + plt.plot(c_x_train[0], label="class " + str(c)) +plt.legend(loc="best") +plt.show() +plt.close() +``` + +## Standardize the data + +Our timeseries are already in a single length (500). However, their values are +usually in various ranges. This is not ideal for a neural network; +in general we should seek to make the input values normalized. +For this specific dataset, the data is already z-normalized: each timeseries sample +has a mean equal to zero and a standard deviation equal to one. This type of +normalization is very common for timeseries classification problems, see +[Bagnall et al. (2016)](https://link.springer.com/article/10.1007/s10618-016-0483-9). + +Note that the timeseries data used here are univariate, meaning we only have one channel +per timeseries example. +We will therefore transform the timeseries into a multivariate one with one channel +using a simple reshaping via numpy. +This will allow us to construct a model that is easily applicable to multivariate time +series. + +```python +x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1)) +x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1)) +``` + +Finally, in order to use `sparse_categorical_crossentropy`, we will have to count +the number of classes beforehand. + +```python +num_classes = len(np.unique(y_train)) +``` + +Now we shuffle the training set because we will be using the `validation_split` option +later when training. + +```python +idx = np.random.permutation(len(x_train)) +x_train = x_train[idx] +y_train = y_train[idx] +``` + +Standardize the labels to positive integers. +The expected labels will then be 0 and 1. + +```python +y_train[y_train == -1] = 0 +y_test[y_test == -1] = 0 +``` + +## Build a model + +We build a Fully Convolutional Neural Network originally proposed in +[this paper](https://arxiv.org/abs/1611.06455). +The implementation is based on the TF 2 version provided +[here](https://github.com/hfawaz/dl-4-tsc/). +The following hyperparameters (kernel_size, filters, the usage of BatchNorm) were found +via random search using [KerasTuner](https://github.com/keras-team/keras-tuner). + +```python +def make_model(input_shape): + input_layer = keras.layers.Input(input_shape) + + conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(input_layer) + conv1 = keras.layers.BatchNormalization()(conv1) + conv1 = keras.layers.ReLU()(conv1) + + conv2 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv1) + conv2 = keras.layers.BatchNormalization()(conv2) + conv2 = keras.layers.ReLU()(conv2) + + conv3 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv2) + conv3 = keras.layers.BatchNormalization()(conv3) + conv3 = keras.layers.ReLU()(conv3) + + gap = keras.layers.GlobalAveragePooling1D()(conv3) + + output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap) + + return keras.models.Model(inputs=input_layer, outputs=output_layer) + + +model = make_model(input_shape=x_train.shape[1:]) +keras.utils.plot_model(model, show_shapes=True) +``` + +## Train the model + +```python +epochs = 500 +batch_size = 32 + +callbacks = [ + keras.callbacks.ModelCheckpoint( + "best_model.keras", save_best_only=True, monitor="val_loss" + ), + keras.callbacks.ReduceLROnPlateau( + monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001 + ), + keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1), +] +model.compile( + optimizer="adam", + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], +) +history = model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs, + callbacks=callbacks, + validation_split=0.2, + verbose=1, +) +``` + +## Evaluate model on test data + +```python +model = keras.models.load_model("best_model.keras") + +test_loss, test_acc = model.evaluate(x_test, y_test) + +print("Test accuracy", test_acc) +print("Test loss", test_loss) +``` + +## Plot the model's training and validation loss + +```python +metric = "sparse_categorical_accuracy" +plt.figure() +plt.plot(history.history[metric]) +plt.plot(history.history["val_" + metric]) +plt.title("model " + metric) +plt.ylabel(metric, fontsize="large") +plt.xlabel("epoch", fontsize="large") +plt.legend(["train", "val"], loc="best") +plt.show() +plt.close() +``` + +We can see how the training accuracy reaches almost 0.95 after 100 epochs. +However, by observing the validation accuracy we can see how the network still needs +training until it reaches almost 0.97 for both the validation and the training accuracy +after 200 epochs. Beyond the 200th epoch, if we continue on training, the validation +accuracy will start decreasing while the training accuracy will continue on increasing: +the model starts overfitting. + diff --git a/.tether/vignettes-src/examples/vision/autoencoder.Rmd b/.tether/vignettes-src/examples/vision/autoencoder.Rmd new file mode 100644 index 0000000000..858a176a76 --- /dev/null +++ b/.tether/vignettes-src/examples/vision/autoencoder.Rmd @@ -0,0 +1,166 @@ +--- +title: Convolutional autoencoder for image denoising +author: '[Santiago L. Valdarrama](https://twitter.com/svpino)' +date-created: 2021/03/01 +last-modified: 2021/03/01 +description: How to train a deep convolutional autoencoder for image denoising. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/vision/autoencoder.py +--- + +## Introduction + +This example demonstrates how to implement a deep convolutional autoencoder +for image denoising, mapping noisy digits images from the MNIST dataset to +clean digits images. This implementation is based on an original blog post +titled [Building Autoencoders in Keras](https://blog.keras.io/building-autoencoders-in-keras.html) +by [François Chollet](https://twitter.com/fchollet). + +## Setup + +```python +import numpy as np +import matplotlib.pyplot as plt + +from keras import layers +from keras.datasets import mnist +from keras.models import Model + + +def preprocess(array): + """Normalizes the supplied array and reshapes it.""" + array = array.astype("float32") / 255.0 + array = np.reshape(array, (len(array), 28, 28, 1)) + return array + + +def noise(array): + """Adds random noise to each image in the supplied array.""" + noise_factor = 0.4 + noisy_array = array + noise_factor * np.random.normal( + loc=0.0, scale=1.0, size=array.shape + ) + + return np.clip(noisy_array, 0.0, 1.0) + + +def display(array1, array2): + """Displays ten random images from each array.""" + n = 10 + indices = np.random.randint(len(array1), size=n) + images1 = array1[indices, :] + images2 = array2[indices, :] + + plt.figure(figsize=(20, 4)) + for i, (image1, image2) in enumerate(zip(images1, images2)): + ax = plt.subplot(2, n, i + 1) + plt.imshow(image1.reshape(28, 28)) + plt.gray() + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + + ax = plt.subplot(2, n, i + 1 + n) + plt.imshow(image2.reshape(28, 28)) + plt.gray() + ax.get_xaxis().set_visible(False) + ax.get_yaxis().set_visible(False) + + plt.show() +``` + +## Prepare the data + +```python +# Since we only need images from the dataset to encode and decode, we +# won't use the labels. +(train_data, _), (test_data, _) = mnist.load_data() + +# Normalize and reshape the data +train_data = preprocess(train_data) +test_data = preprocess(test_data) + +# Create a copy of the data with added noise +noisy_train_data = noise(train_data) +noisy_test_data = noise(test_data) + +# Display the train data and a version of it with added noise +display(train_data, noisy_train_data) +``` + +## Build the autoencoder + +We are going to use the Functional API to build our convolutional autoencoder. + +```python +input = layers.Input(shape=(28, 28, 1)) + +# Encoder +x = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(input) +x = layers.MaxPooling2D((2, 2), padding="same")(x) +x = layers.Conv2D(32, (3, 3), activation="relu", padding="same")(x) +x = layers.MaxPooling2D((2, 2), padding="same")(x) + +# Decoder +x = layers.Conv2DTranspose(32, (3, 3), strides=2, activation="relu", padding="same")(x) +x = layers.Conv2DTranspose(32, (3, 3), strides=2, activation="relu", padding="same")(x) +x = layers.Conv2D(1, (3, 3), activation="sigmoid", padding="same")(x) + +# Autoencoder +autoencoder = Model(input, x) +autoencoder.compile(optimizer="adam", loss="binary_crossentropy") +autoencoder.summary() +``` + +Now we can train our autoencoder using `train_data` as both our input data +and target. Notice we are setting up the validation data using the same +format. + +```python +autoencoder.fit( + x=train_data, + y=train_data, + epochs=50, + batch_size=128, + shuffle=True, + validation_data=(test_data, test_data), +) +``` + +Let's predict on our test dataset and display the original image together with +the prediction from our autoencoder. + +Notice how the predictions are pretty close to the original images, although +not quite the same. + +```python +predictions = autoencoder.predict(test_data) +display(test_data, predictions) +``` + +Now that we know that our autoencoder works, let's retrain it using the noisy +data as our input and the clean data as our target. We want our autoencoder to +learn how to denoise the images. + +```python +autoencoder.fit( + x=noisy_train_data, + y=train_data, + epochs=100, + batch_size=128, + shuffle=True, + validation_data=(noisy_test_data, test_data), +) +``` + +Let's now predict on the noisy data and display the results of our autoencoder. + +Notice how the autoencoder does an amazing job at removing the noise from the +input images. + +```python +predictions = autoencoder.predict(noisy_test_data) +display(noisy_test_data, predictions) +``` + diff --git a/.tether/vignettes-src/examples/vision/mnist_convnet.Rmd b/.tether/vignettes-src/examples/vision/mnist_convnet.Rmd new file mode 100644 index 0000000000..df5aac8fb5 --- /dev/null +++ b/.tether/vignettes-src/examples/vision/mnist_convnet.Rmd @@ -0,0 +1,84 @@ +--- +title: Simple MNIST convnet +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2015/06/19 +last-modified: 2020/04/21 +description: A simple convnet that achieves ~99% test accuracy on MNIST. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/examples/vision/mnist_convnet.py +--- + +## Setup + +```python +import numpy as np +import keras +from keras import layers +``` + +## Prepare the data + +```python +# Model / data parameters +num_classes = 10 +input_shape = (28, 28, 1) + +# Load the data and split it between train and test sets +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +# Scale images to the [0, 1] range +x_train = x_train.astype("float32") / 255 +x_test = x_test.astype("float32") / 255 +# Make sure images have shape (28, 28, 1) +x_train = np.expand_dims(x_train, -1) +x_test = np.expand_dims(x_test, -1) +print("x_train shape:", x_train.shape) +print(x_train.shape[0], "train samples") +print(x_test.shape[0], "test samples") + + +# convert class vectors to binary class matrices +y_train = keras.utils.to_categorical(y_train, num_classes) +y_test = keras.utils.to_categorical(y_test, num_classes) +``` + +## Build the model + +```python +model = keras.Sequential( + [ + keras.Input(shape=input_shape), + layers.Conv2D(32, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + layers.MaxPooling2D(pool_size=(2, 2)), + layers.Flatten(), + layers.Dropout(0.5), + layers.Dense(num_classes, activation="softmax"), + ] +) + +model.summary() +``` + +## Train the model + +```python +batch_size = 128 +epochs = 15 + +model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + +model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1) +``` + +## Evaluate the trained model + +```python +score = model.evaluate(x_test, y_test, verbose=0) +print("Test loss:", score[0]) +print("Test accuracy:", score[1]) +``` + diff --git a/.tether/vignettes-src/functional_api.Rmd b/.tether/vignettes-src/functional_api.Rmd new file mode 100644 index 0000000000..a96c4c814b --- /dev/null +++ b/.tether/vignettes-src/functional_api.Rmd @@ -0,0 +1,859 @@ +--- +title: The Functional API +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2019/03/01 +last-modified: 2023/06/25 +description: Complete guide to the functional API. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/functional_api.py +--- + +## Setup + +```python +import numpy as np +import keras +from keras import layers +from keras import ops +``` + +## Introduction + +The Keras *functional API* is a way to create models that are more flexible +than the `keras.Sequential` API. The functional API can handle models +with non-linear topology, shared layers, and even multiple inputs or outputs. + +The main idea is that a deep learning model is usually +a directed acyclic graph (DAG) of layers. +So the functional API is a way to build *graphs of layers*. + +Consider the following model: + +
+``` +(input: 784-dimensional vectors) + ↧ +[Dense (64 units, relu activation)] + ↧ +[Dense (64 units, relu activation)] + ↧ +[Dense (10 units, softmax activation)] + ↧ +(output: logits of a probability distribution over 10 classes) +``` +
+ +This is a basic graph with three layers. +To build this model using the functional API, start by creating an input node: + +```python +inputs = keras.Input(shape=(784,)) +``` + +The shape of the data is set as a 784-dimensional vector. +The batch size is always omitted since only the shape of each sample is specified. + +If, for example, you have an image input with a shape of `(32, 32, 3)`, +you would use: + +```python +# Just for demonstration purposes. +img_inputs = keras.Input(shape=(32, 32, 3)) +``` + +The `inputs` that is returned contains information about the shape and `dtype` +of the input data that you feed to your model. +Here's the shape: + +```python +inputs.shape +``` + +Here's the dtype: + +```python +inputs.dtype +``` + +You create a new node in the graph of layers by calling a layer on this `inputs` +object: + +```python +dense = layers.Dense(64, activation="relu") +x = dense(inputs) +``` + +The "layer call" action is like drawing an arrow from "inputs" to this layer +you created. +You're "passing" the inputs to the `dense` layer, and you get `x` as the output. + +Let's add a few more layers to the graph of layers: + +```python +x = layers.Dense(64, activation="relu")(x) +outputs = layers.Dense(10)(x) +``` + +At this point, you can create a `Model` by specifying its inputs and outputs +in the graph of layers: + +```python +model = keras.Model(inputs=inputs, outputs=outputs, name="mnist_model") +``` + +Let's check out what the model summary looks like: + +```python +model.summary() +``` + +You can also plot the model as a graph: + +```python +keras.utils.plot_model(model, "my_first_model.png") +``` + +And, optionally, display the input and output shapes of each layer +in the plotted graph: + +```python +keras.utils.plot_model(model, "my_first_model_with_shape_info.png", show_shapes=True) +``` + +This figure and the code are almost identical. In the code version, +the connection arrows are replaced by the call operation. + +A "graph of layers" is an intuitive mental image for a deep learning model, +and the functional API is a way to create models that closely mirrors this. + +## Training, evaluation, and inference + +Training, evaluation, and inference work exactly in the same way for models +built using the functional API as for `Sequential` models. + +The `Model` class offers a built-in training loop (the `fit()` method) +and a built-in evaluation loop (the `evaluate()` method). Note +that you can easily customize these loops to implement your own training routines. +See also the guides on customizing what happens in `fit()`: + +- [Writing a custom train step with TensorFlow](/guides/custom_train_step_in_tensorflow/) +- [Writing a custom train step with JAX](/guides/custom_train_step_in_jax/) +- [Writing a custom train step with PyTorch](/guides/custom_train_step_in_torch/) + +Here, load the MNIST image data, reshape it into vectors, +fit the model on the data (while monitoring performance on a validation split), +then evaluate the model on the test data: + +```python +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +x_train = x_train.reshape(60000, 784).astype("float32") / 255 +x_test = x_test.reshape(10000, 784).astype("float32") / 255 + +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.RMSprop(), + metrics=["accuracy"], +) + +history = model.fit(x_train, y_train, batch_size=64, epochs=2, validation_split=0.2) + +test_scores = model.evaluate(x_test, y_test, verbose=2) +print("Test loss:", test_scores[0]) +print("Test accuracy:", test_scores[1]) +``` + +For further reading, see the +[training and evaluation](/guides/training_with_built_in_methods/) guide. + +## Save and serialize + +Saving the model and serialization work the same way for models built using +the functional API as they do for `Sequential` models. The standard way +to save a functional model is to call `model.save()` +to save the entire model as a single file. You can later recreate the same model +from this file, even if the code that built the model is no longer available. + +This saved file includes the: +- model architecture +- model weight values (that were learned during training) +- model training config, if any (as passed to `compile()`) +- optimizer and its state, if any (to restart training where you left off) + +```python +model.save("my_model.keras") +del model +# Recreate the exact same model purely from the file: +model = keras.models.load_model("my_model.keras") +``` + +For details, read the model [serialization & saving](/guides/serialization_and_saving/) guide. + +## Use the same graph of layers to define multiple models + +In the functional API, models are created by specifying their inputs +and outputs in a graph of layers. That means that a single +graph of layers can be used to generate multiple models. + +In the example below, you use the same stack of layers to instantiate two models: +an `encoder` model that turns image inputs into 16-dimensional vectors, +and an end-to-end `autoencoder` model for training. + +```python +encoder_input = keras.Input(shape=(28, 28, 1), name="img") +x = layers.Conv2D(16, 3, activation="relu")(encoder_input) +x = layers.Conv2D(32, 3, activation="relu")(x) +x = layers.MaxPooling2D(3)(x) +x = layers.Conv2D(32, 3, activation="relu")(x) +x = layers.Conv2D(16, 3, activation="relu")(x) +encoder_output = layers.GlobalMaxPooling2D()(x) + +encoder = keras.Model(encoder_input, encoder_output, name="encoder") +encoder.summary() + +x = layers.Reshape((4, 4, 1))(encoder_output) +x = layers.Conv2DTranspose(16, 3, activation="relu")(x) +x = layers.Conv2DTranspose(32, 3, activation="relu")(x) +x = layers.UpSampling2D(3)(x) +x = layers.Conv2DTranspose(16, 3, activation="relu")(x) +decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x) + +autoencoder = keras.Model(encoder_input, decoder_output, name="autoencoder") +autoencoder.summary() +``` + +Here, the decoding architecture is strictly symmetrical +to the encoding architecture, so the output shape is the same as +the input shape `(28, 28, 1)`. + +The reverse of a `Conv2D` layer is a `Conv2DTranspose` layer, +and the reverse of a `MaxPooling2D` layer is an `UpSampling2D` layer. + +## All models are callable, just like layers + +You can treat any model as if it were a layer by invoking it on an `Input` or +on the output of another layer. By calling a model you aren't just reusing +the architecture of the model, you're also reusing its weights. + +To see this in action, here's a different take on the autoencoder example that +creates an encoder model, a decoder model, and chains them in two calls +to obtain the autoencoder model: + +```python +encoder_input = keras.Input(shape=(28, 28, 1), name="original_img") +x = layers.Conv2D(16, 3, activation="relu")(encoder_input) +x = layers.Conv2D(32, 3, activation="relu")(x) +x = layers.MaxPooling2D(3)(x) +x = layers.Conv2D(32, 3, activation="relu")(x) +x = layers.Conv2D(16, 3, activation="relu")(x) +encoder_output = layers.GlobalMaxPooling2D()(x) + +encoder = keras.Model(encoder_input, encoder_output, name="encoder") +encoder.summary() + +decoder_input = keras.Input(shape=(16,), name="encoded_img") +x = layers.Reshape((4, 4, 1))(decoder_input) +x = layers.Conv2DTranspose(16, 3, activation="relu")(x) +x = layers.Conv2DTranspose(32, 3, activation="relu")(x) +x = layers.UpSampling2D(3)(x) +x = layers.Conv2DTranspose(16, 3, activation="relu")(x) +decoder_output = layers.Conv2DTranspose(1, 3, activation="relu")(x) + +decoder = keras.Model(decoder_input, decoder_output, name="decoder") +decoder.summary() + +autoencoder_input = keras.Input(shape=(28, 28, 1), name="img") +encoded_img = encoder(autoencoder_input) +decoded_img = decoder(encoded_img) +autoencoder = keras.Model(autoencoder_input, decoded_img, name="autoencoder") +autoencoder.summary() +``` + +As you can see, the model can be nested: a model can contain sub-models +(since a model is just like a layer). +A common use case for model nesting is *ensembling*. +For example, here's how to ensemble a set of models into a single model +that averages their predictions: + +```python +def get_model(): + inputs = keras.Input(shape=(128,)) + outputs = layers.Dense(1)(inputs) + return keras.Model(inputs, outputs) + + +model1 = get_model() +model2 = get_model() +model3 = get_model() + +inputs = keras.Input(shape=(128,)) +y1 = model1(inputs) +y2 = model2(inputs) +y3 = model3(inputs) +outputs = layers.average([y1, y2, y3]) +ensemble_model = keras.Model(inputs=inputs, outputs=outputs) +``` + +## Manipulate complex graph topologies + +### Models with multiple inputs and outputs + +The functional API makes it easy to manipulate multiple inputs and outputs. +This cannot be handled with the `Sequential` API. + +For example, if you're building a system for ranking customer issue tickets by +priority and routing them to the correct department, +then the model will have three inputs: + +- the title of the ticket (text input), +- the text body of the ticket (text input), and +- any tags added by the user (categorical input) + +This model will have two outputs: + +- the priority score between 0 and 1 (scalar sigmoid output), and +- the department that should handle the ticket (softmax output +over the set of departments). + +You can build this model in a few lines with the functional API: + +```python +num_tags = 12 # Number of unique issue tags +num_words = 10000 # Size of vocabulary obtained when preprocessing text data +num_departments = 4 # Number of departments for predictions + +title_input = keras.Input( + shape=(None,), name="title" +) # Variable-length sequence of ints +body_input = keras.Input(shape=(None,), name="body") # Variable-length sequence of ints +tags_input = keras.Input( + shape=(num_tags,), name="tags" +) # Binary vectors of size `num_tags` + +# Embed each word in the title into a 64-dimensional vector +title_features = layers.Embedding(num_words, 64)(title_input) +# Embed each word in the text into a 64-dimensional vector +body_features = layers.Embedding(num_words, 64)(body_input) + +# Reduce sequence of embedded words in the title into a single 128-dimensional vector +title_features = layers.LSTM(128)(title_features) +# Reduce sequence of embedded words in the body into a single 32-dimensional vector +body_features = layers.LSTM(32)(body_features) + +# Merge all available features into a single large vector via concatenation +x = layers.concatenate([title_features, body_features, tags_input]) + +# Stick a logistic regression for priority prediction on top of the features +priority_pred = layers.Dense(1, name="priority")(x) +# Stick a department classifier on top of the features +department_pred = layers.Dense(num_departments, name="department")(x) + +# Instantiate an end-to-end model predicting both priority and department +model = keras.Model( + inputs=[title_input, body_input, tags_input], + outputs={"priority": priority_pred, "department": department_pred}, +) +``` + +Now plot the model: + +```python +keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True) +``` + +When compiling this model, you can assign different losses to each output. +You can even assign different weights to each loss -- to modulate +their contribution to the total training loss. + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss=[ + keras.losses.BinaryCrossentropy(from_logits=True), + keras.losses.CategoricalCrossentropy(from_logits=True), + ], + loss_weights=[1.0, 0.2], +) +``` + +Since the output layers have different names, you could also specify +the losses and loss weights with the corresponding layer names: + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss={ + "priority": keras.losses.BinaryCrossentropy(from_logits=True), + "department": keras.losses.CategoricalCrossentropy(from_logits=True), + }, + loss_weights={"priority": 1.0, "department": 0.2}, +) +``` + +Train the model by passing lists of NumPy arrays of inputs and targets: + +```python +# Dummy input data +title_data = np.random.randint(num_words, size=(1280, 10)) +body_data = np.random.randint(num_words, size=(1280, 100)) +tags_data = np.random.randint(2, size=(1280, num_tags)).astype("float32") + +# Dummy target data +priority_targets = np.random.random(size=(1280, 1)) +dept_targets = np.random.randint(2, size=(1280, num_departments)) + +model.fit( + {"title": title_data, "body": body_data, "tags": tags_data}, + {"priority": priority_targets, "department": dept_targets}, + epochs=2, + batch_size=32, +) +``` + +When calling fit with a `Dataset` object, it should yield either a +tuple of lists like `([title_data, body_data, tags_data], [priority_targets, dept_targets])` +or a tuple of dictionaries like +`({'title': title_data, 'body': body_data, 'tags': tags_data}, {'priority': priority_targets, 'department': dept_targets})`. + +For more detailed explanation, refer to the +[training and evaluation](/guides/training_with_built_in_methods/) guide. + +### A toy ResNet model + +In addition to models with multiple inputs and outputs, +the functional API makes it easy to manipulate non-linear connectivity +topologies -- these are models with layers that are not connected sequentially, +which the `Sequential` API cannot handle. + +A common use case for this is residual connections. +Let's build a toy ResNet model for CIFAR10 to demonstrate this: + +```python +inputs = keras.Input(shape=(32, 32, 3), name="img") +x = layers.Conv2D(32, 3, activation="relu")(inputs) +x = layers.Conv2D(64, 3, activation="relu")(x) +block_1_output = layers.MaxPooling2D(3)(x) + +x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_1_output) +x = layers.Conv2D(64, 3, activation="relu", padding="same")(x) +block_2_output = layers.add([x, block_1_output]) + +x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_2_output) +x = layers.Conv2D(64, 3, activation="relu", padding="same")(x) +block_3_output = layers.add([x, block_2_output]) + +x = layers.Conv2D(64, 3, activation="relu")(block_3_output) +x = layers.GlobalAveragePooling2D()(x) +x = layers.Dense(256, activation="relu")(x) +x = layers.Dropout(0.5)(x) +outputs = layers.Dense(10)(x) + +model = keras.Model(inputs, outputs, name="toy_resnet") +model.summary() +``` + +Plot the model: + +```python +keras.utils.plot_model(model, "mini_resnet.png", show_shapes=True) +``` + +Now train the model: + +```python +(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data() + +x_train = x_train.astype("float32") / 255.0 +x_test = x_test.astype("float32") / 255.0 +y_train = keras.utils.to_categorical(y_train, 10) +y_test = keras.utils.to_categorical(y_test, 10) + +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss=keras.losses.CategoricalCrossentropy(from_logits=True), + metrics=["acc"], +) +# We restrict the data to the first 1000 samples so as to limit execution time +# on Colab. Try to train on the entire dataset until convergence! +model.fit( + x_train[:1000], + y_train[:1000], + batch_size=64, + epochs=1, + validation_split=0.2, +) +``` + +## Shared layers + +Another good use for the functional API are models that use *shared layers*. +Shared layers are layer instances that are reused multiple times in the same model -- +they learn features that correspond to multiple paths in the graph-of-layers. + +Shared layers are often used to encode inputs from similar spaces +(say, two different pieces of text that feature similar vocabulary). +They enable sharing of information across these different inputs, +and they make it possible to train such a model on less data. +If a given word is seen in one of the inputs, +that will benefit the processing of all inputs that pass through the shared layer. + +To share a layer in the functional API, call the same layer instance multiple times. +For instance, here's an `Embedding` layer shared across two different text inputs: + +```python +# Embedding for 1000 unique words mapped to 128-dimensional vectors +shared_embedding = layers.Embedding(1000, 128) + +# Variable-length sequence of integers +text_input_a = keras.Input(shape=(None,), dtype="int32") + +# Variable-length sequence of integers +text_input_b = keras.Input(shape=(None,), dtype="int32") + +# Reuse the same layer to encode both inputs +encoded_input_a = shared_embedding(text_input_a) +encoded_input_b = shared_embedding(text_input_b) +``` + +## Extract and reuse nodes in the graph of layers + +Because the graph of layers you are manipulating is a static data structure, +it can be accessed and inspected. And this is how you are able to plot +functional models as images. + +This also means that you can access the activations of intermediate layers +("nodes" in the graph) and reuse them elsewhere -- +which is very useful for something like feature extraction. + +Let's look at an example. This is a VGG19 model with weights pretrained on ImageNet: + +```python +vgg19 = keras.applications.VGG19() +``` + +And these are the intermediate activations of the model, +obtained by querying the graph data structure: + +```python +features_list = [layer.output for layer in vgg19.layers] +``` + +Use these features to create a new feature-extraction model that returns +the values of the intermediate layer activations: + +```python +feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list) + +img = np.random.random((1, 224, 224, 3)).astype("float32") +extracted_features = feat_extraction_model(img) +``` + +This comes in handy for tasks like +[neural style transfer](https://keras.io/examples/generative/neural_style_transfer/), +among other things. + +## Extend the API using custom layers + +`keras` includes a wide range of built-in layers, for example: + +- Convolutional layers: `Conv1D`, `Conv2D`, `Conv3D`, `Conv2DTranspose` +- Pooling layers: `MaxPooling1D`, `MaxPooling2D`, `MaxPooling3D`, `AveragePooling1D` +- RNN layers: `GRU`, `LSTM`, `ConvLSTM2D` +- `BatchNormalization`, `Dropout`, `Embedding`, etc. + +But if you don't find what you need, it's easy to extend the API by creating +your own layers. All layers subclass the `Layer` class and implement: + +- `call` method, that specifies the computation done by the layer. +- `build` method, that creates the weights of the layer (this is just a style +convention since you can create weights in `__init__`, as well). + +To learn more about creating layers from scratch, read +[custom layers and models](/guides/making_new_layers_and_models_via_subclassing) guide. + +The following is a basic implementation of `keras.layers.Dense`: + +```python +class CustomDense(layers.Layer): + def __init__(self, units=32): + super().__init__() + self.units = units + + def build(self, input_shape): + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), initializer="random_normal", trainable=True + ) + + def call(self, inputs): + return ops.matmul(inputs, self.w) + self.b + + +inputs = keras.Input((4,)) +outputs = CustomDense(10)(inputs) + +model = keras.Model(inputs, outputs) +``` + +For serialization support in your custom layer, define a `get_config()` +method that returns the constructor arguments of the layer instance: + +```python +class CustomDense(layers.Layer): + def __init__(self, units=32): + super().__init__() + self.units = units + + def build(self, input_shape): + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), initializer="random_normal", trainable=True + ) + + def call(self, inputs): + return ops.matmul(inputs, self.w) + self.b + + def get_config(self): + return {"units": self.units} + + +inputs = keras.Input((4,)) +outputs = CustomDense(10)(inputs) + +model = keras.Model(inputs, outputs) +config = model.get_config() + +new_model = keras.Model.from_config(config, custom_objects={"CustomDense": CustomDense}) +``` + +Optionally, implement the class method `from_config(cls, config)` which is used +when recreating a layer instance given its config dictionary. +The default implementation of `from_config` is: + +```python +def from_config(cls, config): + return cls(**config) +``` + +## When to use the functional API + +Should you use the Keras functional API to create a new model, +or just subclass the `Model` class directly? In general, the functional API +is higher-level, easier and safer, and has a number of +features that subclassed models do not support. + +However, model subclassing provides greater flexibility when building models +that are not easily expressible as directed acyclic graphs of layers. +For example, you could not implement a Tree-RNN with the functional API +and would have to subclass `Model` directly. + +For an in-depth look at the differences between the functional API and +model subclassing, read +[What are Symbolic and Imperative APIs in TensorFlow 2.0?](https://blog.tensorflow.org/2019/01/what-are-symbolic-and-imperative-apis.html). + +### Functional API strengths: + +The following properties are also true for Sequential models +(which are also data structures), but are not true for subclassed models +(which are Python bytecode, not data structures). + +#### Less verbose + +There is no `super().__init__(...)`, no `def call(self, ...):`, etc. + +Compare: + +```python +inputs = keras.Input(shape=(32,)) +x = layers.Dense(64, activation='relu')(inputs) +outputs = layers.Dense(10)(x) +mlp = keras.Model(inputs, outputs) +``` + +With the subclassed version: + +```python +class MLP(keras.Model): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.dense_1 = layers.Dense(64, activation='relu') + self.dense_2 = layers.Dense(10) + + def call(self, inputs): + x = self.dense_1(inputs) + return self.dense_2(x) + +# Instantiate the model. +mlp = MLP() +# Necessary to create the model's state. +# The model doesn't have a state until it's called at least once. +_ = mlp(ops.zeros((1, 32))) +``` + +#### Model validation while defining its connectivity graph + +In the functional API, the input specification (shape and dtype) is created +in advance (using `Input`). Every time you call a layer, +the layer checks that the specification passed to it matches its assumptions, +and it will raise a helpful error message if not. + +This guarantees that any model you can build with the functional API will run. +All debugging -- other than convergence-related debugging -- +happens statically during the model construction and not at execution time. +This is similar to type checking in a compiler. + +#### A functional model is plottable and inspectable + +You can plot the model as a graph, and you can easily access intermediate nodes +in this graph. For example, to extract and reuse the activations of intermediate +layers (as seen in a previous example): + +```python +features_list = [layer.output for layer in vgg19.layers] +feat_extraction_model = keras.Model(inputs=vgg19.input, outputs=features_list) +``` + +#### A functional model can be serialized or cloned + +Because a functional model is a data structure rather than a piece of code, +it is safely serializable and can be saved as a single file +that allows you to recreate the exact same model +without having access to any of the original code. +See the [serialization & saving guide](/guides/serialization_and_saving/). + +To serialize a subclassed model, it is necessary for the implementer +to specify a `get_config()` +and `from_config()` method at the model level. + + +### Functional API weakness: + +#### It does not support dynamic architectures + +The functional API treats models as DAGs of layers. +This is true for most deep learning architectures, but not all -- for example, +recursive networks or Tree RNNs do not follow this assumption and cannot +be implemented in the functional API. + +## Mix-and-match API styles + +Choosing between the functional API or Model subclassing isn't a +binary decision that restricts you into one category of models. +All models in the `keras` API can interact with each other, whether they're +`Sequential` models, functional models, or subclassed models that are written +from scratch. + +You can always use a functional model or `Sequential` model +as part of a subclassed model or layer: + +```python +units = 32 +timesteps = 10 +input_dim = 5 + +# Define a Functional model +inputs = keras.Input((None, units)) +x = layers.GlobalAveragePooling1D()(inputs) +outputs = layers.Dense(1)(x) +model = keras.Model(inputs, outputs) + + +class CustomRNN(layers.Layer): + def __init__(self): + super().__init__() + self.units = units + self.projection_1 = layers.Dense(units=units, activation="tanh") + self.projection_2 = layers.Dense(units=units, activation="tanh") + # Our previously-defined Functional model + self.classifier = model + + def call(self, inputs): + outputs = [] + state = ops.zeros(shape=(inputs.shape[0], self.units)) + for t in range(inputs.shape[1]): + x = inputs[:, t, :] + h = self.projection_1(x) + y = h + self.projection_2(state) + state = y + outputs.append(y) + features = ops.stack(outputs, axis=1) + print(features.shape) + return self.classifier(features) + + +rnn_model = CustomRNN() +_ = rnn_model(ops.zeros((1, timesteps, input_dim))) +``` + +You can use any subclassed layer or model in the functional API +as long as it implements a `call` method that follows one of the following patterns: + +- `call(self, inputs, **kwargs)` -- +Where `inputs` is a tensor or a nested structure of tensors (e.g. a list of tensors), +and where `**kwargs` are non-tensor arguments (non-inputs). +- `call(self, inputs, training=None, **kwargs)` -- +Where `training` is a boolean indicating whether the layer should behave +in training mode and inference mode. +- `call(self, inputs, mask=None, **kwargs)` -- +Where `mask` is a boolean mask tensor (useful for RNNs, for instance). +- `call(self, inputs, training=None, mask=None, **kwargs)` -- +Of course, you can have both masking and training-specific behavior at the same time. + +Additionally, if you implement the `get_config` method on your custom Layer or model, +the functional models you create will still be serializable and cloneable. + +Here's a quick example of a custom RNN, written from scratch, +being used in a functional model: + +```python +units = 32 +timesteps = 10 +input_dim = 5 +batch_size = 16 + + +class CustomRNN(layers.Layer): + def __init__(self): + super().__init__() + self.units = units + self.projection_1 = layers.Dense(units=units, activation="tanh") + self.projection_2 = layers.Dense(units=units, activation="tanh") + self.classifier = layers.Dense(1) + + def call(self, inputs): + outputs = [] + state = ops.zeros(shape=(inputs.shape[0], self.units)) + for t in range(inputs.shape[1]): + x = inputs[:, t, :] + h = self.projection_1(x) + y = h + self.projection_2(state) + state = y + outputs.append(y) + features = ops.stack(outputs, axis=1) + return self.classifier(features) + + +# Note that you specify a static batch size for the inputs with the `batch_shape` +# arg, because the inner computation of `CustomRNN` requires a static batch size +# (when you create the `state` zeros tensor). +inputs = keras.Input(batch_shape=(batch_size, timesteps, input_dim)) +x = layers.Conv1D(32, 3)(inputs) +outputs = CustomRNN()(x) + +model = keras.Model(inputs, outputs) + +rnn_model = CustomRNN() +_ = rnn_model(ops.zeros((1, 10, 5))) +``` + diff --git a/.tether/vignettes-src/getting_started_with_keras_core.Rmd b/.tether/vignettes-src/getting_started_with_keras_core.Rmd new file mode 100644 index 0000000000..17331ad389 --- /dev/null +++ b/.tether/vignettes-src/getting_started_with_keras_core.Rmd @@ -0,0 +1,368 @@ +--- +title: Getting started with Keras Core +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/07/10 +last-modified: 2023/07/10 +description: First contact with the new multi-backend Keras. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette) +--- + +## Introduction + +Keras Core is a full implementation of the Keras API that +works with TensorFlow, JAX, and PyTorch interchangeably. +This notebook will walk you through key Keras Core workflows. + +First, let's install Keras Core: + +pip install -q keras-core + +## Setup + +We're going to be using the JAX backend here -- but you can +edit the string below to `"tensorflow"` or `"torch"` and hit +"Restart runtime", and the whole notebook will run just the same! +This entire guide is backend-agnostic. + +```python +import numpy as np +import os + +os.environ["KERAS_BACKEND"] = "jax" + +# Note that keras_core should only be imported after the backend +# has been configured. The backend cannot be changed once the +# package is imported. +import keras_core as keras +``` + +## A first example: A MNIST convnet + +Let's start with the Hello World of ML: training a convnet +to classify MNIST digits. + +Here's the data: + +```python +# Load the data and split it between train and test sets +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +# Scale images to the [0, 1] range +x_train = x_train.astype("float32") / 255 +x_test = x_test.astype("float32") / 255 +# Make sure images have shape (28, 28, 1) +x_train = np.expand_dims(x_train, -1) +x_test = np.expand_dims(x_test, -1) +print("x_train shape:", x_train.shape) +print("y_train shape:", y_train.shape) +print(x_train.shape[0], "train samples") +print(x_test.shape[0], "test samples") +``` + +Here's our model. + +Different model-building options that Keras offers include: + +- [The Sequential API](https://keras.io/keras_core/guides/sequential_model/) (what we use below) +- [The Functional API](https://keras.io/keras_core/guides/functional_api/) (most typical) +- [Writing your own models yourself via subclassing](https://keras.io/keras_coremaking_new_layers_and_models_via_subclassing.html) (for advanced use cases) + +```python +# Model parameters +num_classes = 10 +input_shape = (28, 28, 1) + +model = keras.Sequential( + [ + keras.layers.Input(shape=input_shape), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.GlobalAveragePooling2D(), + keras.layers.Dropout(0.5), + keras.layers.Dense(num_classes, activation="softmax"), + ] +) +``` + +Here's our model summary: + +```python +model.summary() +``` + +We use the `compile()` method to specify the optimizer, loss function, +and the metrics to monitor. Note that with the JAX and TensorFlow backends, +XLA compilation is turned on by default. + +```python +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) +``` + +Let's train and evaluate the model. We'll set aside a validation split of 15% +of the data during training to monitor generalization on unseen data. + +```python +batch_size = 128 +epochs = 20 + +callbacks = [ + keras.callbacks.ModelCheckpoint(filepath="model_at_epoch_{epoch}.keras"), + keras.callbacks.EarlyStopping(monitor="val_loss", patience=2), +] + +model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs, + validation_split=0.15, + callbacks=callbacks, +) +score = model.evaluate(x_test, y_test, verbose=0) +``` + +During training, we were saving a model at the end of each epoch. You +can also save the model in its latest state like this: + +```python +model.save("final_model.keras") +``` + +And reload it like this: + +```python +model = keras.saving.load_model("final_model.keras") +``` + +Next, you can query predictions of class probabilities with `predict()`: + +```python +predictions = model.predict(x_test) +``` + +That's it for the basics! + +## Writing cross-framework custom components + +Keras Core enables you to write custom Layers, Models, Metrics, Losses, and Optimizers +that work across TensorFlow, JAX, and PyTorch with the same codebase. Let's take a look +at custom layers first. + +If you're already familiar with writing custom layers in `tf.keras` -- well, nothing +has changed. Except one thing: instead of using functions from the `tf` namespace, you should use functions +from `keras.ops.*`. + +The `keras.ops` namespace contains: + +- An implementation of the NumPy API, e.g. `keras.ops.stack` or `keras.ops.matmul`. +- A set of neural network specific ops that are absent from NumPy, such as `keras.ops.conv` +or `keras.ops.binary_crossentropy`. + +Let's make a custom `Dense` layer that works with all backends: + +```python +class MyDense(keras.layers.Layer): + def __init__(self, units, activation=None, name=None): + super().__init__(name=name) + self.units = units + self.activation = keras.activations.get(activation) + + def build(self, input_shape): + input_dim = input_shape[-1] + self.w = self.add_weight( + shape=(input_dim, self.units), + initializer=keras.initializers.GlorotNormal(), + name="kernel", + trainable=True, + ) + + self.b = self.add_weight( + shape=(self.units,), + initializer=keras.initializers.Zeros(), + name="bias", + trainable=True, + ) + + def call(self, inputs): + # Use Keras ops to create backend-agnostic layers/metrics/etc. + x = keras.ops.matmul(inputs, self.w) + self.b + return self.activation(x) +``` + +Next, let's make a custom `Dropout` layer that relies on the `keras.random` +namespace: + +```python +class MyDropout(keras.layers.Layer): + def __init__(self, rate, name=None): + super().__init__(name=name) + self.rate = rate + # Use seed_generator for managing RNG state. + # It is a state element and its seed variable is + # tracked as part of `layer.variables`. + self.seed_generator = keras.random.SeedGenerator(1337) + + def call(self, inputs): + # Use `keras_core.random` for random ops. + return keras.random.dropout(inputs, self.rate, seed=self.seed_generator) +``` + +Next, let's write a custom subclassed model that uses our two custom layers: + +```python +class MyModel(keras.Model): + def __init__(self, num_classes): + super().__init__() + self.conv_base = keras.Sequential( + [ + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.GlobalAveragePooling2D(), + ] + ) + self.dp = MyDropout(0.5) + self.dense = MyDense(num_classes, activation="softmax") + + def call(self, x): + x = self.conv_base(x) + x = self.dp(x) + return self.dense(x) +``` + +Let's compile it and fit it: + +```python +model = MyModel(num_classes=10) +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) + +model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=1, # For speed + validation_split=0.15, +) +``` + +## Training models on arbitrary data sources + +All Keras models can be trained and evaluated on a wide variety of data sources, +independently of the backend you're using. This includes: + +- NumPy arrays +- Pandas dataframes +- TensorFlow`tf.data.Dataset` objects +- PyTorch `DataLoader` objects +- Keras `PyDataset` objects + +They all work whether you're using TensorFlow, JAX, or PyTorch as your Keras backend. + +Let's try it out with PyTorch `DataLoaders`: + +```python +import torch + +# Create a TensorDataset +train_torch_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_train), torch.from_numpy(y_train) +) +val_torch_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_test), torch.from_numpy(y_test) +) + +# Create a DataLoader +train_dataloader = torch.utils.data.DataLoader( + train_torch_dataset, batch_size=batch_size, shuffle=True +) +val_dataloader = torch.utils.data.DataLoader( + val_torch_dataset, batch_size=batch_size, shuffle=False +) + +model = MyModel(num_classes=10) +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) +model.fit(train_dataloader, epochs=1, validation_data=val_dataloader) +``` + +Now let's try this out with `tf.data`: + +```python +import tensorflow as tf + +train_dataset = ( + tf.data.Dataset.from_tensor_slices((x_train, y_train)) + .batch(batch_size) + .prefetch(tf.data.AUTOTUNE) +) +test_dataset = ( + tf.data.Dataset.from_tensor_slices((x_test, y_test)) + .batch(batch_size) + .prefetch(tf.data.AUTOTUNE) +) + +model = MyModel(num_classes=10) +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) +model.fit(train_dataset, epochs=1, validation_data=test_dataset) +``` + +## Further reading + +This concludes our short overview of the new multi-backend capabilities +of Keras Core. Next, you can learn about: + +### How to customize what happens in `fit()` + +Want to implement a non-standard training algorithm yourself +(e.g. a GAN training routine) but still want to benefit from +the power and usability of `fit()`? It's really easy to customize +`fit()` to support arbitrary use cases. + +- [Customizing what happens in `fit()` with TensorFlow](http://keras.io/keras_core/guides/custom_train_step_in_tensorflow/) +- [Customizing what happens in `fit()` with JAX](http://keras.io/keras_core/guides/custom_train_step_in_jax/) +- [Customizing what happens in `fit()` with PyTorch](http://keras.io/keras_core/guides/custom_train_step_in_pytorch/) + +## How to write custom training loops + +- [Writing a training loop from scratch in TensorFlow](http://keras.io/keras_core/guides/writing_a_custom_training_loop_in_tensorflow/) +- [Writing a training loop from scratch in JAX](http://keras.io/keras_core/guides/writing_a_custom_training_loop_in_jax/) +- [Writing a training loop from scratch in PyTorch](http://keras.io/keras_core/guides/writing_a_custom_training_loop_in_torch/) + + +## How to distribute training + +- [Guide to distributed training with TensorFlow](http://keras.io/keras_core/guides/distributed_training_with_tensorflow/) +- [JAX distributed training example](https://github.com/keras-team/keras-core/blob/main/examples/demo_jax_distributed.py) +- [PyTorch distributed training example](https://github.com/keras-team/keras-core/blob/main/examples/demo_torch_multi_gpu.py) + +Enjoy the library! 🚀 diff --git a/.tether/vignettes-src/guides/distribution.Rmd b/.tether/vignettes-src/guides/distribution.Rmd new file mode 100644 index 0000000000..81db087d1d --- /dev/null +++ b/.tether/vignettes-src/guides/distribution.Rmd @@ -0,0 +1,242 @@ +""" +Title: Distributed training with Keras 3 +Author: [Qianli Zhu](https://github.com/qlzh727) +Date created: 2023/11/07 +Last modified: 2023/11/07 +Description: Complete guide to the distribution API for multi-backend Keras. +Accelerator: GPU +""" + +""" +## Introduction + +The Keras distribution API is a new interface designed to facilitate +distributed deep learning across a variety of backends like JAX, TensorFlow and +PyTorch. This powerful API introduces a suite of tools enabling data and model +parallelism, allowing for efficient scaling of deep learning models on multiple +accelerators and hosts. Whether leveraging the power of GPUs or TPUs, the API +provides a streamlined approach to initializing distributed environments, +defining device meshes, and orchestrating the layout of tensors across +computational resources. Through classes like `DataParallel` and +`ModelParallel`, it abstracts the complexity involved in parallel computation, +making it easier for developers to accelerate their machine learning +workflows. + +""" + +""" +## How it works + +The Keras distribution API provides a global programming model that allows +developers to compose applications that operate on tensors in a global context +(as if working with a single device) while +automatically managing distribution across many devices. The API leverages the +underlying framework (e.g. JAX) to distribute the program and tensors according to the +sharding directives through a procedure called single program, multiple data +(SPMD) expansion. + +By decoupling the application from sharding directives, the API enables running +the same application on a single device, multiple devices, or even multiple +clients, while preserving its global semantics. +""" + +""" +## Setup +""" + +import os + +# The distribution API is only implemented for the JAX backend for now. +os.environ["KERAS_BACKEND"] = "jax" + +import keras +from keras import layers +import jax +import numpy as np +from tensorflow import data as tf_data # For dataset input. + +""" +## `DeviceMesh` and `TensorLayout` + +The `keras.distribution.DeviceMesh` class in Keras distribution API represents a cluster of +computational devices configured for distributed computation. It aligns with +similar concepts in [`jax.sharding.Mesh`](https://jax.readthedocs.io/en/latest/jax.sharding.html#jax.sharding.Mesh) and +[`tf.dtensor.Mesh`](https://www.tensorflow.org/api_docs/python/tf/experimental/dtensor/Mesh), +where it's used to map the physical devices to a logical mesh structure. + +The `TensorLayout` class then specifies how tensors are distributed across the +`DeviceMesh`, detailing the sharding of tensors along specified axes that +correspond to the names of the axes in the `DeviceMesh`. + +You can find more detailed concept explainers in the +[TensorFlow DTensor guide](https://www.tensorflow.org/guide/dtensor_overview#dtensors_model_of_distributed_tensors). +""" + +# Retrieve the local available gpu devices. +devices = jax.devices("gpu") # Assume it has 8 local GPUs. + +# Define a 2x4 device mesh with data and model parallel axes +mesh = keras.distribution.DeviceMesh( + shape=(2, 4), axis_names=["data", "model"], devices=devices +) + +# A 2D layout, which describes how a tensor is distributed across the +# mesh. The layout can be visualized as a 2D grid with "model" as rows and +# "data" as columns, and it is a [4, 2] grid when it mapped to the physical +# devices on the mesh. +layout_2d = keras.distribution.TensorLayout(axes=("model", "data"), device_mesh=mesh) + +# A 4D layout which could be used for data parallel of a image input. +replicated_layout_4d = keras.distribution.TensorLayout( + axes=("data", None, None, None), device_mesh=mesh +) + +""" +## Distribution + +The `Distribution` class in Keras serves as a foundational abstract class designed +for developing custom distribution strategies. It encapsulates the core logic +needed to distribute a model's variables, input data, and intermediate +computations across a device mesh. As an end user, you won't have to interact +directly with this class, but its subclasses like `DataParallel` or +`ModelParallel`. +""" + +""" +## DataParallel + +The `DataParallel` class in the Keras distribution API is designed for the +data parallelism strategy in distributed training, where the model weights are +replicated across all devices in the `DeviceMesh`, and each device processes a +portion of the input data. + +Here is a sample usage of this class. +""" + +# Create DataParallel with list of devices. +# As a shortcut, the devices can be skipped, +# and Keras will detect all local available devices. +# E.g. data_parallel = DataParallel() +data_parallel = keras.distribution.DataParallel(devices=devices) + +# Or you can choose to create DataParallel with a 1D `DeviceMesh`. +mesh_1d = keras.distribution.DeviceMesh( + shape=(8,), axis_names=["data"], devices=devices +) +data_parallel = keras.distribution.DataParallel(device_mesh=mesh_1d) + +inputs = np.random.normal(size=(128, 28, 28, 1)) +labels = np.random.normal(size=(128, 10)) +dataset = tf_data.Dataset.from_tensor_slices((inputs, labels)).batch(16) + +# Set the global distribution. +keras.distribution.set_distribution(data_parallel) + +# Note that all the model weights from here on are replicated to +# all the devices of the `DeviceMesh`. This includes the RNG +# state, optimizer states, metrics, etc. The dataset fed into `model.fit` or +# `model.evaluate` will be split evenly on the batch dimension, and sent to +# all the devices. You don't have to do any manual aggregration of losses, +# since all the computation happens in a global context. +inputs = layers.Input(shape=(28, 28, 1)) +y = layers.Flatten()(inputs) +y = layers.Dense(units=200, use_bias=False, activation="relu")(y) +y = layers.Dropout(0.4)(y) +y = layers.Dense(units=10, activation="softmax")(y) +model = keras.Model(inputs=inputs, outputs=y) + +model.compile(loss="mse") +model.fit(dataset, epochs=3) +model.evaluate(dataset) + +""" +## `ModelParallel` and `LayoutMap` + +`ModelParallel` will be mostly useful when model weights are too large to fit +on a single accelerator. This setting allows you to spit your model weights or +activation tensors across all the devices on the `DeviceMesh`, and enable the +horizontal scaling for the large models. + +Unlike the `DataParallel` model where all weights are fully replicated, +the weights layout under `ModelParallel` usually need some customization for +best performances. We introduce `LayoutMap` to let you specify the +`TensorLayout` for any weights and intermediate tensors from global perspective. + +`LayoutMap` is a dict-like object that maps a string to `TensorLayout` +instances. It behaves differently from a normal Python dict in that the string +key is treated as a regex when retrieving the value. The class allows you to +define the naming schema of `TensorLayout` and then retrieve the corresponding +`TensorLayout` instance. Typically, the key used to query +is the `variable.path` attribute, which is the identifier of the variable. +As a shortcut, a tuple or list of axis +names is also allowed when inserting a value, and it will be converted to +`TensorLayout`. + +The `LayoutMap` can also optionally contain a `DeviceMesh` to populate the +`TensorLayout.device_mesh` if it is not set. When retrieving a layout with a +key, and if there isn't an exact match, all existing keys in the layout map will +be treated as regex and matched against the input key again. If there are +multiple matches, a `ValueError` is raised. If no matches are found, `None` is +returned. +""" + +mesh_2d = keras.distribution.DeviceMesh( + shape=(2, 4), axis_names=["data", "model"], devices=devices +) +layout_map = keras.distribution.LayoutMap(mesh_2d) +# The rule below means that for any weights that match with d1/kernel, it +# will be sharded with model dimensions (4 devices), same for the d1/bias. +# All other weights will be fully replicated. +layout_map["d1/kernel"] = (None, "model") +layout_map["d1/bias"] = ("model",) + +# You can also set the layout for the layer output like +layout_map["d2/output"] = ("data", None) + +model_parallel = keras.distribution.ModelParallel( + mesh_2d, layout_map, batch_dim_name="data" +) + +keras.distribution.set_distribution(model_parallel) + +inputs = layers.Input(shape=(28, 28, 1)) +y = layers.Flatten()(inputs) +y = layers.Dense(units=200, use_bias=False, activation="relu", name="d1")(y) +y = layers.Dropout(0.4)(y) +y = layers.Dense(units=10, activation="softmax", name="d2")(y) +model = keras.Model(inputs=inputs, outputs=y) + +# The data will be sharded across the "data" dimension of the method, which +# has 2 devices. +model.compile(loss="mse") +model.fit(dataset, epochs=3) +model.evaluate(dataset) + +""" +It is also easy to change the mesh structure to tune the computation between +more data parallel or model parallel. You can do this by adjusting the shape of +the mesh. And no changes are needed for any other code. +""" + +full_data_parallel_mesh = keras.distribution.DeviceMesh( + shape=(8, 1), axis_names=["data", "model"], devices=devices +) +more_data_parallel_mesh = keras.distribution.DeviceMesh( + shape=(4, 2), axis_names=["data", "model"], devices=devices +) +more_model_parallel_mesh = keras.distribution.DeviceMesh( + shape=(2, 4), axis_names=["data", "model"], devices=devices +) +full_model_parallel_mesh = keras.distribution.DeviceMesh( + shape=(1, 8), axis_names=["data", "model"], devices=devices +) + +""" +### Further reading + +1. [JAX Distributed arrays and automatic parallelization](https://jax.readthedocs.io/en/latest/notebooks/Distributed_arrays_and_automatic_parallelization.html) +2. [JAX sharding module](https://jax.readthedocs.io/en/latest/jax.sharding.html) +3. [TensorFlow Distributed training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) +4. [TensorFlow DTensor concepts](https://www.tensorflow.org/guide/dtensor_overview) +5. [Using DTensors with tf.keras](https://www.tensorflow.org/tutorials/distribute/dtensor_keras_tutorial) +""" diff --git a/.tether/vignettes-src/intro_to_keras_for_engineers.Rmd b/.tether/vignettes-src/intro_to_keras_for_engineers.Rmd new file mode 100644 index 0000000000..52ca07d95f --- /dev/null +++ b/.tether/vignettes-src/intro_to_keras_for_engineers.Rmd @@ -0,0 +1,364 @@ +--- +title: Introduction to Keras for engineers +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/07/10 +last-modified: 2023/07/10 +description: First contact with Keras 3. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/intro_to_keras_for_engineers.py +--- + +## Introduction + +Keras 3 is a deep learning framework +works with TensorFlow, JAX, and PyTorch interchangeably. +This notebook will walk you through key Keras 3 workflows. + +Let's start by installing Keras 3: + +pip install keras --upgrade --quiet + +## Setup + +We're going to be using the JAX backend here -- but you can +edit the string below to `"tensorflow"` or `"torch"` and hit +"Restart runtime", and the whole notebook will run just the same! +This entire guide is backend-agnostic. + +```python +import numpy as np +import os + +os.environ["KERAS_BACKEND"] = "jax" + +# Note that Keras should only be imported after the backend +# has been configured. The backend cannot be changed once the +# package is imported. +import keras +``` + +## A first example: A MNIST convnet + +Let's start with the Hello World of ML: training a convnet +to classify MNIST digits. + +Here's the data: + +```python +# Load the data and split it between train and test sets +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +# Scale images to the [0, 1] range +x_train = x_train.astype("float32") / 255 +x_test = x_test.astype("float32") / 255 +# Make sure images have shape (28, 28, 1) +x_train = np.expand_dims(x_train, -1) +x_test = np.expand_dims(x_test, -1) +print("x_train shape:", x_train.shape) +print("y_train shape:", y_train.shape) +print(x_train.shape[0], "train samples") +print(x_test.shape[0], "test samples") +``` + +Here's our model. + +Different model-building options that Keras offers include: + +- [The Sequential API](https://keras.io/guides/sequential_model/) (what we use below) +- [The Functional API](https://keras.io/guides/functional_api/) (most typical) +- [Writing your own models yourself via subclassing](https://keras.io/guides/making_new_layers_and_models_via_subclassing/) (for advanced use cases) + +```python +# Model parameters +num_classes = 10 +input_shape = (28, 28, 1) + +model = keras.Sequential( + [ + keras.layers.Input(shape=input_shape), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.GlobalAveragePooling2D(), + keras.layers.Dropout(0.5), + keras.layers.Dense(num_classes, activation="softmax"), + ] +) +``` + +Here's our model summary: + +```python +model.summary() +``` + +We use the `compile()` method to specify the optimizer, loss function, +and the metrics to monitor. Note that with the JAX and TensorFlow backends, +XLA compilation is turned on by default. + +```python +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) +``` + +Let's train and evaluate the model. We'll set aside a validation split of 15% +of the data during training to monitor generalization on unseen data. + +```python +batch_size = 128 +epochs = 20 + +callbacks = [ + keras.callbacks.ModelCheckpoint(filepath="model_at_epoch_{epoch}.keras"), + keras.callbacks.EarlyStopping(monitor="val_loss", patience=2), +] + +model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=epochs, + validation_split=0.15, + callbacks=callbacks, +) +score = model.evaluate(x_test, y_test, verbose=0) +``` + +During training, we were saving a model at the end of each epoch. You +can also save the model in its latest state like this: + +```python +model.save("final_model.keras") +``` + +And reload it like this: + +```python +model = keras.saving.load_model("final_model.keras") +``` + +Next, you can query predictions of class probabilities with `predict()`: + +```python +predictions = model.predict(x_test) +``` + +That's it for the basics! + +## Writing cross-framework custom components + +Keras enables you to write custom Layers, Models, Metrics, Losses, and Optimizers +that work across TensorFlow, JAX, and PyTorch with the same codebase. Let's take a look +at custom layers first. + +The `keras.ops` namespace contains: + +- An implementation of the NumPy API, e.g. `keras.ops.stack` or `keras.ops.matmul`. +- A set of neural network specific ops that are absent from NumPy, such as `keras.ops.conv` +or `keras.ops.binary_crossentropy`. + +Let's make a custom `Dense` layer that works with all backends: + +```python +class MyDense(keras.layers.Layer): + def __init__(self, units, activation=None, name=None): + super().__init__(name=name) + self.units = units + self.activation = keras.activations.get(activation) + + def build(self, input_shape): + input_dim = input_shape[-1] + self.w = self.add_weight( + shape=(input_dim, self.units), + initializer=keras.initializers.GlorotNormal(), + name="kernel", + trainable=True, + ) + + self.b = self.add_weight( + shape=(self.units,), + initializer=keras.initializers.Zeros(), + name="bias", + trainable=True, + ) + + def call(self, inputs): + # Use Keras ops to create backend-agnostic layers/metrics/etc. + x = keras.ops.matmul(inputs, self.w) + self.b + return self.activation(x) +``` + +Next, let's make a custom `Dropout` layer that relies on the `keras.random` +namespace: + +```python +class MyDropout(keras.layers.Layer): + def __init__(self, rate, name=None): + super().__init__(name=name) + self.rate = rate + # Use seed_generator for managing RNG state. + # It is a state element and its seed variable is + # tracked as part of `layer.variables`. + self.seed_generator = keras.random.SeedGenerator(1337) + + def call(self, inputs): + # Use `keras.random` for random ops. + return keras.random.dropout(inputs, self.rate, seed=self.seed_generator) +``` + +Next, let's write a custom subclassed model that uses our two custom layers: + +```python +class MyModel(keras.Model): + def __init__(self, num_classes): + super().__init__() + self.conv_base = keras.Sequential( + [ + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"), + keras.layers.MaxPooling2D(pool_size=(2, 2)), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu"), + keras.layers.GlobalAveragePooling2D(), + ] + ) + self.dp = MyDropout(0.5) + self.dense = MyDense(num_classes, activation="softmax") + + def call(self, x): + x = self.conv_base(x) + x = self.dp(x) + return self.dense(x) +``` + +Let's compile it and fit it: + +```python +model = MyModel(num_classes=10) +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) + +model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=1, # For speed + validation_split=0.15, +) +``` + +## Training models on arbitrary data sources + +All Keras models can be trained and evaluated on a wide variety of data sources, +independently of the backend you're using. This includes: + +- NumPy arrays +- Pandas dataframes +- TensorFlow `tf.data.Dataset` objects +- PyTorch `DataLoader` objects +- Keras `PyDataset` objects + +They all work whether you're using TensorFlow, JAX, or PyTorch as your Keras backend. + +Let's try it out with PyTorch `DataLoaders`: + +```python +import torch + +# Create a TensorDataset +train_torch_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_train), torch.from_numpy(y_train) +) +val_torch_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_test), torch.from_numpy(y_test) +) + +# Create a DataLoader +train_dataloader = torch.utils.data.DataLoader( + train_torch_dataset, batch_size=batch_size, shuffle=True +) +val_dataloader = torch.utils.data.DataLoader( + val_torch_dataset, batch_size=batch_size, shuffle=False +) + +model = MyModel(num_classes=10) +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) +model.fit(train_dataloader, epochs=1, validation_data=val_dataloader) +``` + +Now let's try this out with `tf.data`: + +```python +import tensorflow as tf + +train_dataset = ( + tf.data.Dataset.from_tensor_slices((x_train, y_train)) + .batch(batch_size) + .prefetch(tf.data.AUTOTUNE) +) +test_dataset = ( + tf.data.Dataset.from_tensor_slices((x_test, y_test)) + .batch(batch_size) + .prefetch(tf.data.AUTOTUNE) +) + +model = MyModel(num_classes=10) +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[ + keras.metrics.SparseCategoricalAccuracy(name="acc"), + ], +) +model.fit(train_dataset, epochs=1, validation_data=test_dataset) +``` + +## Further reading + +This concludes our short overview of the new multi-backend capabilities +of Keras 3. Next, you can learn about: + +### How to customize what happens in `fit()` + +Want to implement a non-standard training algorithm yourself but still want to benefit from +the power and usability of `fit()`? It's easy to customize +`fit()` to support arbitrary use cases: + +- [Customizing what happens in `fit()` with TensorFlow](http://keras.io/guides/custom_train_step_in_tensorflow/) +- [Customizing what happens in `fit()` with JAX](http://keras.io/guides/custom_train_step_in_jax/) +- [Customizing what happens in `fit()` with PyTorch](http://keras.io/guides/custom_train_step_in_torch/) + +## How to write custom training loops + +- [Writing a training loop from scratch in TensorFlow](http://keras.io/guides/writing_a_custom_training_loop_in_tensorflow/) +- [Writing a training loop from scratch in JAX](http://keras.io/guides/writing_a_custom_training_loop_in_jax/) +- [Writing a training loop from scratch in PyTorch](http://keras.io/guides/writing_a_custom_training_loop_in_torch/) + +## How to distribute training + +- [Guide to distributed training with TensorFlow](http://keras.io/guides/distributed_training_with_tensorflow/) +- [JAX distributed training example](https://github.com/keras-team/keras/blob/master/examples/demo_jax_distributed.py) +- [PyTorch distributed training example](https://github.com/keras-team/keras/blob/master/examples/demo_torch_multi_gpu.py) + +Enjoy the library! 🚀 + diff --git a/.tether/vignettes-src/intro_to_keras_for_researchers.Rmd b/.tether/vignettes-src/intro_to_keras_for_researchers.Rmd new file mode 100644 index 0000000000..1ede3872cf --- /dev/null +++ b/.tether/vignettes-src/intro_to_keras_for_researchers.Rmd @@ -0,0 +1,1124 @@ +--- +title: Introduction to Keras for Researchers +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2020/04/01 +last-modified: 2020/10/02 +description: Everything you need to know to use Keras & TensorFlow for deep learning + research. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette) +--- + +## Setup + +```python +import tensorflow as tf +import keras +``` + +## Introduction + +Are you a machine learning researcher? Do you publish at NeurIPS and push the +state-of-the-art in CV and NLP? This guide will serve as your first introduction to core +Keras & TensorFlow API concepts. + +In this guide, you will learn about: + +- Tensors, variables, and gradients in TensorFlow +- Creating layers by subclassing the `Layer` class +- Writing low-level training loops +- Tracking losses created by layers via the `add_loss()` method +- Tracking metrics in a low-level training loop +- Speeding up execution with a compiled `tf.function` +- Executing layers in training or inference mode +- The Keras Functional API + +You will also see the Keras API in action in two end-to-end research examples: +a Variational Autoencoder, and a Hypernetwork. + +## Tensors + +TensorFlow is an infrastructure layer for differentiable programming. +At its heart, it's a framework for manipulating N-dimensional arrays (tensors), +much like NumPy. + +However, there are three key differences between NumPy and TensorFlow: + +- TensorFlow can leverage hardware accelerators such as GPUs and TPUs. +- TensorFlow can automatically compute the gradient of arbitrary differentiable tensor expressions. +- TensorFlow computation can be distributed to large numbers of devices on a single machine, and large number of +machines (potentially with multiple devices each). + +Let's take a look at the object that is at the core of TensorFlow: the Tensor. + +Here's a constant tensor: + +```python +x = tf.constant([[5, 2], [1, 3]]) +print(x) +``` + +You can get its value as a NumPy array by calling `.numpy()`: + +```python +x.numpy() +``` + +Much like a NumPy array, it features the attributes `dtype` and `shape`: + +```python +print("dtype:", x.dtype) +print("shape:", x.shape) +``` + +A common way to create constant tensors is via `tf.ones` and `tf.zeros` (just like `np.ones` and `np.zeros`): + +```python +print(tf.ones(shape=(2, 1))) +print(tf.zeros(shape=(2, 1))) +``` + +You can also create random constant tensors: + +```python +x = tf.random.normal(shape=(2, 2), mean=0.0, stddev=1.0) + +x = tf.random.uniform(shape=(2, 2), minval=0, maxval=10, dtype="int32") +``` + +## Variables + +Variables are special tensors used to store mutable state (such as the weights of a neural network). +You create a `Variable` using some initial value: + +```python +initial_value = tf.random.normal(shape=(2, 2)) +a = tf.Variable(initial_value) +print(a) +``` + +You update the value of a `Variable` by using the methods `.assign(value)`, `.assign_add(increment)`, or `.assign_sub(decrement)`: + +```python +new_value = tf.random.normal(shape=(2, 2)) +a.assign(new_value) +for i in range(2): + for j in range(2): + assert a[i, j] == new_value[i, j] + +added_value = tf.random.normal(shape=(2, 2)) +a.assign_add(added_value) +for i in range(2): + for j in range(2): + assert a[i, j] == new_value[i, j] + added_value[i, j] +``` + +## Doing math in TensorFlow + +If you've used NumPy, doing math in TensorFlow will look very familiar. +The main difference is that your TensorFlow code can run on GPU and TPU. + +```python +a = tf.random.normal(shape=(2, 2)) +b = tf.random.normal(shape=(2, 2)) + +c = a + b +d = tf.square(c) +e = tf.exp(d) +``` + +## Gradients + +Here's another big difference with NumPy: you can automatically retrieve the gradient of any differentiable expression. + +Just open a `GradientTape`, start "watching" a tensor via `tape.watch()`, +and compose a differentiable expression using this tensor as input: + +```python +a = tf.random.normal(shape=(2, 2)) +b = tf.random.normal(shape=(2, 2)) + +with tf.GradientTape() as tape: + tape.watch(a) # Start recording the history of operations applied to `a` + c = tf.sqrt(tf.square(a) + tf.square(b)) # Do some math using `a` + # What's the gradient of `c` with respect to `a`? + dc_da = tape.gradient(c, a) + print(dc_da) +``` + +By default, variables are watched automatically, so you don't need to manually `watch` them: + +```python +a = tf.Variable(a) + +with tf.GradientTape() as tape: + c = tf.sqrt(tf.square(a) + tf.square(b)) + dc_da = tape.gradient(c, a) + print(dc_da) +``` + +Note that you can compute higher-order derivatives by nesting tapes: + +```python +with tf.GradientTape() as outer_tape: + with tf.GradientTape() as tape: + c = tf.sqrt(tf.square(a) + tf.square(b)) + dc_da = tape.gradient(c, a) + d2c_da2 = outer_tape.gradient(dc_da, a) + print(d2c_da2) +``` + +## Keras layers + +While TensorFlow is an **infrastructure layer for differentiable programming**, +dealing with tensors, variables, and gradients, +Keras is a **user interface for deep learning**, dealing with +layers, models, optimizers, loss functions, metrics, and more. + +Keras serves as the high-level API for TensorFlow: +Keras is what makes TensorFlow simple and productive. + +The `Layer` class is the fundamental abstraction in Keras. +A `Layer` encapsulates a state (weights) and some computation +(defined in the call method). + +A simple layer looks like this. +The `self.add_weight()` method gives you a shortcut for creating weights: + +```python +class Linear(keras.layers.Layer): + """y = w.x + b""" + + def __init__(self, units=32, input_dim=32): + super().__init__() + self.w = self.add_weight( + shape=(input_dim, units), initializer="random_normal", trainable=True + ) + self.b = self.add_weight(shape=(units,), initializer="zeros", trainable=True) + + def call(self, inputs): + return tf.matmul(inputs, self.w) + self.b +``` + +You would use a `Layer` instance much like a Python function: + +```python +# Instantiate our layer. +linear_layer = Linear(units=4, input_dim=2) + +# The layer can be treated as a function. +# Here we call it on some data. +y = linear_layer(tf.ones((2, 2))) +assert y.shape == (2, 4) +``` + +The weight variables (created in `__init__`) are automatically +tracked under the `weights` property: + +```python +assert linear_layer.weights == [linear_layer.w, linear_layer.b] +``` + +You have many built-in layers available, from `Dense` to `Conv2D` to `LSTM` to +fancier ones like `Conv3DTranspose` or `ConvLSTM2D`. Be smart about reusing +built-in functionality. + +## Layer weight creation in `build(input_shape)` + +It's often a good idea to defer weight creation to the `build()` method, so +that you don't need to specify the input dim/shape at layer construction time: + +```python +class Linear(keras.layers.Layer): + """y = w.x + b""" + + def __init__(self, units=32): + super().__init__() + self.units = units + + def build(self, input_shape): + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), initializer="random_normal", trainable=True + ) + + def call(self, inputs): + return tf.matmul(inputs, self.w) + self.b + + +# Instantiate our layer. +linear_layer = Linear(4) + +# This will also call `build(input_shape)` and create the weights. +y = linear_layer(tf.ones((2, 2))) +``` + +## Layer gradients + +You can automatically retrieve the gradients of the weights of a layer by +calling it inside a `GradientTape`. Using these gradients, you can update the +weights of the layer, either manually, or using an optimizer object. Of course, +you can modify the gradients before using them, if you need to. + +```python +# Prepare a dataset. +(x_train, y_train), _ = keras.datasets.mnist.load_data() +dataset = tf.data.Dataset.from_tensor_slices( + (x_train.reshape(60000, 784).astype("float32") / 255, y_train) +) +dataset = dataset.shuffle(buffer_size=1024).batch(64) + +# Instantiate our linear layer (defined above) with 10 units. +linear_layer = Linear(10) + +# Instantiate a logistic loss function that expects integer targets. +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +# Instantiate an optimizer. +optimizer = keras.optimizers.SGD(learning_rate=1e-3) + +# Iterate over the batches of the dataset. +for step, (x, y) in enumerate(dataset): + # Open a GradientTape. + with tf.GradientTape() as tape: + # Forward pass. + logits = linear_layer(x) + + # Loss value for this batch. + loss = loss_fn(y, logits) + + # Get gradients of the loss wrt the weights. + gradients = tape.gradient(loss, linear_layer.trainable_weights) + + # Update the weights of our linear layer. + optimizer.apply_gradients(zip(gradients, linear_layer.trainable_weights)) + + # Logging. + if step % 100 == 0: + print("Step:", step, "Loss:", float(loss)) +``` + +## Trainable and non-trainable weights + +Weights created by layers can be either trainable or non-trainable. They're +exposed in `trainable_weights` and `non_trainable_weights` respectively. +Here's a layer with a non-trainable weight: + +```python +class ComputeSum(keras.layers.Layer): + """Returns the sum of the inputs.""" + + def __init__(self, input_dim): + super().__init__() + # Create a non-trainable weight. + self.total = self.add_weight( + initializer="zeros", shape=(input_dim,), trainable=False + ) + + def call(self, inputs): + self.total.assign_add(tf.reduce_sum(inputs, axis=0)) + return self.total + + +my_sum = ComputeSum(2) +x = tf.ones((2, 2)) + +y = my_sum(x) +print(y.numpy()) # [2. 2.] + +y = my_sum(x) +print(y.numpy()) # [4. 4.] + +assert my_sum.weights == [my_sum.total] +assert my_sum.non_trainable_weights == [my_sum.total] +assert my_sum.trainable_weights == [] +``` + +## Layers that own layers + +Layers can be recursively nested to create bigger computation blocks. +Each layer will track the weights of its sublayers +(both trainable and non-trainable). + +```python +# Let's reuse the Linear class +# with a `build` method that we defined above. + + +class MLP(keras.layers.Layer): + """Simple stack of Linear layers.""" + + def __init__(self): + super().__init__() + self.linear_1 = Linear(32) + self.linear_2 = Linear(32) + self.linear_3 = Linear(10) + + def call(self, inputs): + x = self.linear_1(inputs) + x = tf.nn.relu(x) + x = self.linear_2(x) + x = tf.nn.relu(x) + return self.linear_3(x) + + +mlp = MLP() + +# The first call to the `mlp` object will create the weights. +y = mlp(tf.ones(shape=(3, 64))) + +# Weights are recursively tracked. +assert len(mlp.weights) == 6 +``` + +Note that our manually-created MLP above is equivalent to the following +built-in option: + +```python +mlp = keras.Sequential( + [ + keras.layers.Dense(32, activation=tf.nn.relu), + keras.layers.Dense(32, activation=tf.nn.relu), + keras.layers.Dense(10), + ] +) +``` + +## Tracking losses created by layers + +Layers can create losses during the forward pass via the `add_loss()` method. +This is especially useful for regularization losses. +The losses created by sublayers are recursively tracked by the parent layers. + +Here's a layer that creates an activity regularization loss: + +```python +class ActivityRegularization(keras.layers.Layer): + """Layer that creates an activity sparsity regularization loss.""" + + def __init__(self, rate=0.1): + super().__init__() + self.rate = rate + + def call(self, inputs): + # We use `add_loss` to create a regularization loss + # that depends on the inputs. + self.add_loss(self.rate * tf.reduce_mean(inputs)) + return inputs +``` + +Any model incorporating this layer will track this regularization loss: + +```python +# Let's use the loss layer in a MLP block. + + +class SparseMLP(keras.layers.Layer): + """Stack of Linear layers with a sparsity regularization loss.""" + + def __init__(self): + super().__init__() + self.linear_1 = Linear(32) + self.regularization = ActivityRegularization(0.1) + self.linear_3 = Linear(10) + + def call(self, inputs): + x = self.linear_1(inputs) + x = tf.nn.relu(x) + x = self.regularization(x) + return self.linear_3(x) + + +mlp = SparseMLP() +y = mlp(tf.ones((10, 10))) + +print(mlp.losses) # List containing one float32 scalar +``` + +These losses are cleared by the top-level layer at the start of each forward +pass -- they don't accumulate. `layer.losses` always contains only the losses +created during the last forward pass. You would typically use these losses by +summing them before computing your gradients when writing a training loop. + +```python +# Losses correspond to the *last* forward pass. +mlp = SparseMLP() +mlp(tf.ones((10, 10))) +assert len(mlp.losses) == 1 +mlp(tf.ones((10, 10))) +assert len(mlp.losses) == 1 # No accumulation. + +# Let's demonstrate how to use these losses in a training loop. + +# Prepare a dataset. +(x_train, y_train), _ = keras.datasets.mnist.load_data() +dataset = tf.data.Dataset.from_tensor_slices( + (x_train.reshape(60000, 784).astype("float32") / 255, y_train) +) +dataset = dataset.shuffle(buffer_size=1024).batch(64) + +# A new MLP. +mlp = SparseMLP() + +# Loss and optimizer. +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) +optimizer = keras.optimizers.SGD(learning_rate=1e-3) + +for step, (x, y) in enumerate(dataset): + with tf.GradientTape() as tape: + # Forward pass. + logits = mlp(x) + + # External loss value for this batch. + loss = loss_fn(y, logits) + + # Add the losses created during the forward pass. + loss += sum(mlp.losses) + + # Get gradients of the loss wrt the weights. + gradients = tape.gradient(loss, mlp.trainable_weights) + + # Update the weights of our linear layer. + optimizer.apply_gradients(zip(gradients, mlp.trainable_weights)) + + # Logging. + if step % 100 == 0: + print("Step:", step, "Loss:", float(loss)) +``` + +## Keeping track of training metrics + +Keras offers a broad range of built-in metrics, like `keras.metrics.AUC` +or `keras.metrics.PrecisionAtRecall`. It's also easy to create your +own metrics in a few lines of code. + +To use a metric in a custom training loop, you would: + +- Instantiate the metric object, e.g. `metric = keras.metrics.AUC()` +- Call its `metric.udpate_state(targets, predictions)` method for each batch of data +- Query its result via `metric.result()` +- Reset the metric's state at the end of an epoch or at the start of an evaluation via +`metric.reset_state()` + +Here's a simple example: + +```python +# Instantiate a metric object +accuracy = keras.metrics.SparseCategoricalAccuracy() + +# Prepare our layer, loss, and optimizer. +model = keras.Sequential( + [ + keras.layers.Dense(32, activation="relu"), + keras.layers.Dense(32, activation="relu"), + keras.layers.Dense(10), + ] +) +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) +optimizer = keras.optimizers.Adam(learning_rate=1e-3) + +for epoch in range(2): + # Iterate over the batches of a dataset. + for step, (x, y) in enumerate(dataset): + with tf.GradientTape() as tape: + logits = model(x) + # Compute the loss value for this batch. + loss_value = loss_fn(y, logits) + + # Update the state of the `accuracy` metric. + accuracy.update_state(y, logits) + + # Update the weights of the model to minimize the loss value. + gradients = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply_gradients(zip(gradients, model.trainable_weights)) + + # Logging the current accuracy value so far. + if step % 200 == 0: + print("Epoch:", epoch, "Step:", step) + print("Total running accuracy so far: %.3f" % accuracy.result()) + + # Reset the metric's state at the end of an epoch + accuracy.reset_state() +``` + +You can also define your own metrics by subclassing `keras.metrics.Metric`. +You need to override the three functions called above: + +- Override `update_state()` to update the statistic values. +- Override `result()` to return the metric value. +- Override `reset_state()` to reset the metric to its initial state. + +Here is an example where we implement the F1-score metric +(with support for sample weighting). + +```python +class F1Score(keras.metrics.Metric): + def __init__(self, name="f1_score", dtype="float32", threshold=0.5, **kwargs): + super().__init__(name=name, dtype=dtype, **kwargs) + self.threshold = 0.5 + self.true_positives = self.add_weight( + name="tp", dtype=dtype, initializer="zeros" + ) + self.false_positives = self.add_weight( + name="fp", dtype=dtype, initializer="zeros" + ) + self.false_negatives = self.add_weight( + name="fn", dtype=dtype, initializer="zeros" + ) + + def update_state(self, y_true, y_pred, sample_weight=None): + y_pred = tf.math.greater_equal(y_pred, self.threshold) + y_true = tf.cast(y_true, tf.bool) + y_pred = tf.cast(y_pred, tf.bool) + + true_positives = tf.cast(y_true & y_pred, self.dtype) + false_positives = tf.cast(~y_true & y_pred, self.dtype) + false_negatives = tf.cast(y_true & ~y_pred, self.dtype) + + if sample_weight is not None: + sample_weight = tf.cast(sample_weight, self.dtype) + true_positives *= sample_weight + false_positives *= sample_weight + false_negatives *= sample_weight + + self.true_positives.assign_add(tf.reduce_sum(true_positives)) + self.false_positives.assign_add(tf.reduce_sum(false_positives)) + self.false_negatives.assign_add(tf.reduce_sum(false_negatives)) + + def result(self): + precision = self.true_positives / (self.true_positives + self.false_positives) + recall = self.true_positives / (self.true_positives + self.false_negatives) + return precision * recall * 2.0 / (precision + recall) + + def reset_state(self): + self.true_positives.assign(0) + self.false_positives.assign(0) + self.false_negatives.assign(0) +``` + +Let's test-drive it: + +```python +m = F1Score() +m.update_state([0, 1, 0, 0], [0.3, 0.5, 0.8, 0.9]) +print("Intermediate result:", float(m.result())) + +m.update_state([1, 1, 1, 1], [0.1, 0.7, 0.6, 0.0]) +print("Final result:", float(m.result())) +``` + +## Compiled functions + +Running eagerly is great for debugging, but you will get better performance by +compiling your computation into static graphs. Static graphs are a researcher's +best friends. You can compile any function by wrapping it in a `tf.function` +decorator. + +```python +# Prepare our layer, loss, and optimizer. +model = keras.Sequential( + [ + keras.layers.Dense(32, activation="relu"), + keras.layers.Dense(32, activation="relu"), + keras.layers.Dense(10), + ] +) +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) +optimizer = keras.optimizers.Adam(learning_rate=1e-3) + +# Create a training step function. + + +@tf.function # Make it fast. +def train_on_batch(x, y): + with tf.GradientTape() as tape: + logits = model(x) + loss = loss_fn(y, logits) + gradients = tape.gradient(loss, model.trainable_weights) + optimizer.apply_gradients(zip(gradients, model.trainable_weights)) + return loss + + +# Prepare a dataset. +(x_train, y_train), _ = keras.datasets.mnist.load_data() +dataset = tf.data.Dataset.from_tensor_slices( + (x_train.reshape(60000, 784).astype("float32") / 255, y_train) +) +dataset = dataset.shuffle(buffer_size=1024).batch(64) + +for step, (x, y) in enumerate(dataset): + loss = train_on_batch(x, y) + if step % 100 == 0: + print("Step:", step, "Loss:", float(loss)) +``` + +## Training mode & inference mode + +Some layers, in particular the `BatchNormalization` layer and the `Dropout` +layer, have different behaviors during training and inference. For such layers, +it is standard practice to expose a `training` (boolean) argument in the `call` +method. + +By exposing this argument in `call`, you enable the built-in training and +evaluation loops (e.g. fit) to correctly use the layer in training and +inference modes. + +```python +class Dropout(keras.layers.Layer): + def __init__(self, rate): + super().__init__() + self.rate = rate + + def call(self, inputs, training=None): + if training: + return tf.nn.dropout(inputs, rate=self.rate) + return inputs + + +class MLPWithDropout(keras.layers.Layer): + def __init__(self): + super().__init__() + self.linear_1 = Linear(32) + self.dropout = Dropout(0.5) + self.linear_3 = Linear(10) + + def call(self, inputs, training=None): + x = self.linear_1(inputs) + x = tf.nn.relu(x) + x = self.dropout(x, training=training) + return self.linear_3(x) + + +mlp = MLPWithDropout() +y_train = mlp(tf.ones((2, 2)), training=True) +y_test = mlp(tf.ones((2, 2)), training=False) +``` + +## The Functional API for model-building + +To build deep learning models, you don't have to use object-oriented programming all the +time. All layers we've seen so far can also be composed functionally, like this (we call +it the "Functional API"): + +```python +# We use an `Input` object to describe the shape and dtype of the inputs. +# This is the deep learning equivalent of *declaring a type*. +# The shape argument is per-sample; it does not include the batch size. +# The functional API focused on defining per-sample transformations. +# The model we create will automatically batch the per-sample transformations, +# so that it can be called on batches of data. +inputs = keras.Input(shape=(16,), dtype="float32") + +# We call layers on these "type" objects +# and they return updated types (new shapes/dtypes). +x = Linear(32)(inputs) # We are reusing the Linear layer we defined earlier. +x = Dropout(0.5)(x) # We are reusing the Dropout layer we defined earlier. +outputs = Linear(10)(x) + +# A functional `Model` can be defined by specifying inputs and outputs. +# A model is itself a layer like any other. +model = keras.Model(inputs, outputs) + +# A functional model already has weights, before being called on any data. +# That's because we defined its input shape in advance (in `Input`). +assert len(model.weights) == 4 + +# Let's call our model on some data, for fun. +y = model(tf.ones((2, 16))) +assert y.shape == (2, 10) + +# You can pass a `training` argument in `__call__` +# (it will get passed down to the Dropout layer). +y = model(tf.ones((2, 16)), training=True) +``` + +The Functional API tends to be more concise than subclassing, and provides a few other +advantages (generally the same advantages that functional, typed languages provide over +untyped OO development). However, it can only be used to define DAGs of layers -- +recursive networks should be defined as Layer subclasses instead. + +Learn more about the Functional API [here](/guides/functional_api/). + +In your research workflows, you may often find yourself mix-and-matching OO models and +Functional models. + +Note that the `Model` class also features built-in training & evaluation loops: +`fit()`, `predict()` and `evaluate()` (configured via the `compile()` method). +These built-in functions give you access to the +following built-in training infrastructure features: + +* [Callbacks](/api/callbacks/). You can leverage built-in +callbacks for early-stopping, model checkpointing, +and monitoring training with TensorBoard. You can also +[implement custom callbacks](/guides/writing_your_own_callbacks/) if needed. +* [Distributed training](https://keras.io/guides/distributed_training/). You +can easily scale up your training to multiple GPUs, TPU, or even multiple machines +with the `tf.distribute` API -- with no changes to your code. +* [Step fusing](https://keras.io/api/models/model_training_apis/#compile-method). +With the `steps_per_execution` argument in `Model.compile()`, you can process +multiple batches in a single `tf.function` call, which greatly improves +device utilization on TPUs. + +We won't go into the details, but we provide a simple code example +below. It leverages the built-in training infrastructure to implement the MNIST +example above. + +```python +inputs = keras.Input(shape=(784,), dtype="float32") +x = keras.layers.Dense(32, activation="relu")(inputs) +x = keras.layers.Dense(32, activation="relu")(x) +outputs = keras.layers.Dense(10)(x) +model = keras.Model(inputs, outputs) + +# Specify the loss, optimizer, and metrics with `compile()`. +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer=keras.optimizers.Adam(learning_rate=1e-3), + metrics=[keras.metrics.SparseCategoricalAccuracy()], +) + +# Train the model with the dataset for 2 epochs. +model.fit(dataset, epochs=2) +model.predict(dataset) +model.evaluate(dataset) +``` + +You can always subclass the `Model` class (it works exactly like subclassing +`Layer`) if you want to leverage built-in training loops for your OO models. +Just override the `Model.train_step()` to +customize what happens in `fit()` while retaining support +for the built-in infrastructure features outlined above -- callbacks, +zero-code distribution support, and step fusing support. +You may also override `test_step()` to customize what happens in `evaluate()`, +and override `predict_step()` to customize what happens in `predict()`. For more +information, please refer to +[this guide](custom_train_step_in_tensorflow.html). + +```python +class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_tracker = keras.metrics.Mean(name="loss") + self.accuracy = keras.metrics.SparseCategoricalAccuracy() + self.loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + self.optimizer = keras.optimizers.Adam(learning_rate=1e-3) + + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + x, y = data + with tf.GradientTape() as tape: + y_pred = self(x, training=True) # Forward pass + loss = self.loss_fn(y, y_pred) + gradients = tape.gradient(loss, self.trainable_weights) + self.optimizer.apply_gradients(zip(gradients, self.trainable_weights)) + # Update metrics (includes the metric that tracks the loss) + self.loss_tracker.update_state(loss) + self.accuracy.update_state(y, y_pred) + # Return a dict mapping metric names to current value + return {"loss": self.loss_tracker.result(), "accuracy": self.accuracy.result()} + + @property + def metrics(self): + # We list our `Metric` objects here so that `reset_states()` can be + # called automatically at the start of each epoch. + return [self.loss_tracker, self.accuracy] + + +inputs = keras.Input(shape=(784,), dtype="float32") +x = keras.layers.Dense(32, activation="relu")(inputs) +x = keras.layers.Dense(32, activation="relu")(x) +outputs = keras.layers.Dense(10)(x) +model = CustomModel(inputs, outputs) +model.compile() +model.fit(dataset, epochs=2) +``` + +## End-to-end experiment example 1: variational autoencoders. + +Here are some of the things you've learned so far: + +- A `Layer` encapsulates a state (created in `__init__` or `build`) and some computation +(defined in `call`). +- Layers can be recursively nested to create new, bigger computation blocks. +- You can easily write highly hackable training loops by opening a +`GradientTape`, calling your model inside the tape's scope, then retrieving +gradients and applying them via an optimizer. +- You can speed up your training loops using the `@tf.function` decorator. +- Layers can create and track losses (typically regularization losses) via +`self.add_loss()`. + +Let's put all of these things together into an end-to-end example: we're going to +implement a Variational AutoEncoder (VAE). We'll train it on MNIST digits. + +Our VAE will be a subclass of `Layer`, built as a nested composition of layers that +subclass `Layer`. It will feature a regularization loss (KL divergence). + +Below is our model definition. + +First, we have an `Encoder` class, which uses a `Sampling` layer to map a MNIST digit to +a latent-space triplet `(z_mean, z_log_var, z)`. + +```python +from tensorflow.keras import layers + + +class Sampling(layers.Layer): + """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit.""" + + def call(self, inputs): + z_mean, z_log_var = inputs + batch = tf.shape(z_mean)[0] + dim = tf.shape(z_mean)[1] + epsilon = keras.backend.random_normal(shape=(batch, dim)) + return z_mean + tf.exp(0.5 * z_log_var) * epsilon + + +class Encoder(layers.Layer): + """Maps MNIST digits to a triplet (z_mean, z_log_var, z).""" + + def __init__(self, latent_dim=32, intermediate_dim=64, **kwargs): + super().__init__(**kwargs) + self.dense_proj = layers.Dense(intermediate_dim, activation=tf.nn.relu) + self.dense_mean = layers.Dense(latent_dim) + self.dense_log_var = layers.Dense(latent_dim) + self.sampling = Sampling() + + def call(self, inputs): + x = self.dense_proj(inputs) + z_mean = self.dense_mean(x) + z_log_var = self.dense_log_var(x) + z = self.sampling((z_mean, z_log_var)) + return z_mean, z_log_var, z +``` + +Next, we have a `Decoder` class, which maps the probabilistic latent space coordinates +back to a MNIST digit. + +```python +class Decoder(layers.Layer): + """Converts z, the encoded digit vector, back into a readable digit.""" + + def __init__(self, original_dim, intermediate_dim=64, **kwargs): + super().__init__(**kwargs) + self.dense_proj = layers.Dense(intermediate_dim, activation=tf.nn.relu) + self.dense_output = layers.Dense(original_dim, activation=tf.nn.sigmoid) + + def call(self, inputs): + x = self.dense_proj(inputs) + return self.dense_output(x) +``` + +Finally, our `VariationalAutoEncoder` composes together an encoder and a decoder, and +creates a KL divergence regularization loss via `add_loss()`. + +```python +class VariationalAutoEncoder(layers.Layer): + """Combines the encoder and decoder into an end-to-end model for training.""" + + def __init__(self, original_dim, intermediate_dim=64, latent_dim=32, **kwargs): + super().__init__(**kwargs) + self.original_dim = original_dim + self.encoder = Encoder(latent_dim=latent_dim, intermediate_dim=intermediate_dim) + self.decoder = Decoder(original_dim, intermediate_dim=intermediate_dim) + + def call(self, inputs): + z_mean, z_log_var, z = self.encoder(inputs) + reconstructed = self.decoder(z) + # Add KL divergence regularization loss. + kl_loss = -0.5 * tf.reduce_mean( + z_log_var - tf.square(z_mean) - tf.exp(z_log_var) + 1 + ) + self.add_loss(kl_loss) + return reconstructed +``` + +Now, let's write a training loop. Our training step is decorated with a `@tf.function` to +compile into a super fast graph function. + +```python +# Our model. +vae = VariationalAutoEncoder(original_dim=784, intermediate_dim=64, latent_dim=32) + +# Loss and optimizer. +loss_fn = keras.losses.MeanSquaredError() +optimizer = keras.optimizers.Adam(learning_rate=1e-3) + +# Prepare a dataset. +(x_train, _), _ = keras.datasets.mnist.load_data() +dataset = tf.data.Dataset.from_tensor_slices( + x_train.reshape(60000, 784).astype("float32") / 255 +) +dataset = dataset.shuffle(buffer_size=1024).batch(32) + + +@tf.function +def training_step(x): + with tf.GradientTape() as tape: + reconstructed = vae(x) # Compute input reconstruction. + # Compute loss. + loss = loss_fn(x, reconstructed) + loss += sum(vae.losses) # Add KLD term. + # Update the weights of the VAE. + grads = tape.gradient(loss, vae.trainable_weights) + optimizer.apply_gradients(zip(grads, vae.trainable_weights)) + return loss + + +losses = [] # Keep track of the losses over time. +for step, x in enumerate(dataset): + loss = training_step(x) + # Logging. + losses.append(float(loss)) + if step % 100 == 0: + print("Step:", step, "Loss:", sum(losses) / len(losses)) + + # Stop after 1000 steps. + # Training the model to convergence is left + # as an exercise to the reader. + if step >= 1000: + break +``` + +As you can see, building and training this type of model in Keras +is quick and painless. + +## End-to-end experiment example 2: hypernetworks. + +Let's take a look at another kind of research experiment: hypernetworks. + +The idea is to use a small deep neural network (the hypernetwork) to generate +the weights for a larger network (the main network). + +Let's implement a really trivial hypernetwork: we'll use a small 2-layer network to +generate the weights of a larger 3-layer network. + +```python +import numpy as np + +input_dim = 784 +classes = 10 + +# This is the main network we'll actually use to predict labels. +main_network = keras.Sequential( + [ + keras.layers.Dense(64, activation=tf.nn.relu), + keras.layers.Dense(classes), + ] +) + +# It doesn't need to create its own weights, so let's mark its layers +# as already built. That way, calling `main_network` won't create new variables. +for layer in main_network.layers: + layer.built = True + +# This is the number of weight coefficients to generate. Each layer in the +# main network requires output_dim * input_dim + output_dim coefficients. +num_weights_to_generate = (classes * 64 + classes) + (64 * input_dim + 64) + +# This is the hypernetwork that generates the weights of the `main_network` above. +hypernetwork = keras.Sequential( + [ + keras.layers.Dense(16, activation=tf.nn.relu), + keras.layers.Dense(num_weights_to_generate, activation=tf.nn.sigmoid), + ] +) +``` + +This is our training loop. For each batch of data: + +- We use `hypernetwork` to generate an array of weight coefficients, `weights_pred` +- We reshape these coefficients into kernel & bias tensors for the `main_network` +- We run the forward pass of the `main_network` to compute the actual MNIST predictions +- We run backprop through the weights of the `hypernetwork` to minimize the +final classification loss + +```python +# Loss and optimizer. +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) +optimizer = keras.optimizers.Adam(learning_rate=1e-4) + +# Prepare a dataset. +(x_train, y_train), _ = keras.datasets.mnist.load_data() +dataset = tf.data.Dataset.from_tensor_slices( + (x_train.reshape(60000, 784).astype("float32") / 255, y_train) +) + +# We'll use a batch size of 1 for this experiment. +dataset = dataset.shuffle(buffer_size=1024).batch(1) + + +@tf.function +def train_step(x, y): + with tf.GradientTape() as tape: + # Predict weights for the outer model. + weights_pred = hypernetwork(x) + + # Reshape them to the expected shapes for w and b for the outer model. + # Layer 0 kernel. + start_index = 0 + w0_shape = (input_dim, 64) + w0_coeffs = weights_pred[:, start_index : start_index + np.prod(w0_shape)] + w0 = tf.reshape(w0_coeffs, w0_shape) + start_index += np.prod(w0_shape) + # Layer 0 bias. + b0_shape = (64,) + b0_coeffs = weights_pred[:, start_index : start_index + np.prod(b0_shape)] + b0 = tf.reshape(b0_coeffs, b0_shape) + start_index += np.prod(b0_shape) + # Layer 1 kernel. + w1_shape = (64, classes) + w1_coeffs = weights_pred[:, start_index : start_index + np.prod(w1_shape)] + w1 = tf.reshape(w1_coeffs, w1_shape) + start_index += np.prod(w1_shape) + # Layer 1 bias. + b1_shape = (classes,) + b1_coeffs = weights_pred[:, start_index : start_index + np.prod(b1_shape)] + b1 = tf.reshape(b1_coeffs, b1_shape) + start_index += np.prod(b1_shape) + + # Set the weight predictions as the weight variables on the outer model. + main_network.layers[0].kernel = w0 + main_network.layers[0].bias = b0 + main_network.layers[1].kernel = w1 + main_network.layers[1].bias = b1 + + # Inference on the outer model. + preds = main_network(x) + loss = loss_fn(y, preds) + + # Train only inner model. + grads = tape.gradient(loss, hypernetwork.trainable_weights) + optimizer.apply_gradients(zip(grads, hypernetwork.trainable_weights)) + return loss + + +losses = [] # Keep track of the losses over time. +for step, (x, y) in enumerate(dataset): + loss = train_step(x, y) + + # Logging. + losses.append(float(loss)) + if step % 100 == 0: + print("Step:", step, "Loss:", sum(losses) / len(losses)) + + # Stop after 1000 steps. + # Training the model to convergence is left + # as an exercise to the reader. + if step >= 1000: + break +``` + +Implementing arbitrary research ideas with Keras is straightforward and highly +productive. Imagine trying out 25 ideas per day (20 minutes per experiment on average)! + +Keras has been designed to go from idea to results as fast as possible, because we +believe this is +the key to doing great research. + +We hope you enjoyed this quick introduction. Let us know what you build with Keras! diff --git a/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd b/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd new file mode 100644 index 0000000000..ddaf7aafd1 --- /dev/null +++ b/.tether/vignettes-src/making_new_layers_and_models_via_subclassing.Rmd @@ -0,0 +1,645 @@ +--- +title: Making new layers and models via subclassing +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2019/03/01 +last-modified: 2023/06/25 +description: Complete guide to writing `Layer` and `Model` objects from scratch. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/making_new_layers_and_models_via_subclassing.py +--- + +## Introduction + +This guide will cover everything you need to know to build your own +subclassed layers and models. In particular, you'll learn about the following features: + +- The `Layer` class +- The `add_weight()` method +- Trainable and non-trainable weights +- The `build()` method +- Making sure your layers can be used with any backend +- The `add_loss()` method +- The `training` argument in `call()` +- The `mask` argument in `call()` +- Making sure your layers can be serialized + +Let's dive in. + +## Setup + +```python +import numpy as np +import keras +from keras import ops +from keras import layers +``` + +## The `Layer` class: the combination of state (weights) and some computation + +One of the central abstractions in Keras is the `Layer` class. A layer +encapsulates both a state (the layer's "weights") and a transformation from +inputs to outputs (a "call", the layer's forward pass). + +Here's a densely-connected layer. It has two state variables: +the variables `w` and `b`. + +```python +class Linear(keras.layers.Layer): + def __init__(self, units=32, input_dim=32): + super().__init__() + self.w = self.add_weight( + shape=(input_dim, units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight(shape=(units,), initializer="zeros", trainable=True) + + def call(self, inputs): + return ops.matmul(inputs, self.w) + self.b +``` + +You would use a layer by calling it on some tensor input(s), much like a Python +function. + +```python +x = ops.ones((2, 2)) +linear_layer = Linear(4, 2) +y = linear_layer(x) +print(y) +``` + +Note that the weights `w` and `b` are automatically tracked by the layer upon +being set as layer attributes: + +```python +assert linear_layer.weights == [linear_layer.w, linear_layer.b] +``` + +## Layers can have non-trainable weights + +Besides trainable weights, you can add non-trainable weights to a layer as +well. Such weights are meant not to be taken into account during +backpropagation, when you are training the layer. + +Here's how to add and use a non-trainable weight: + +```python +class ComputeSum(keras.layers.Layer): + def __init__(self, input_dim): + super().__init__() + self.total = self.add_weight( + initializer="zeros", shape=(input_dim,), trainable=False + ) + + def call(self, inputs): + self.total.assign_add(ops.sum(inputs, axis=0)) + return self.total + + +x = ops.ones((2, 2)) +my_sum = ComputeSum(2) +y = my_sum(x) +print(y.numpy()) +y = my_sum(x) +print(y.numpy()) +``` + +It's part of `layer.weights`, but it gets categorized as a non-trainable weight: + +```python +print("weights:", len(my_sum.weights)) +print("non-trainable weights:", len(my_sum.non_trainable_weights)) + +# It's not included in the trainable weights: +print("trainable_weights:", my_sum.trainable_weights) +``` + +## Best practice: deferring weight creation until the shape of the inputs is known + +Our `Linear` layer above took an `input_dim` argument that was used to compute +the shape of the weights `w` and `b` in `__init__()`: + +```python +class Linear(keras.layers.Layer): + def __init__(self, units=32, input_dim=32): + super().__init__() + self.w = self.add_weight( + shape=(input_dim, units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight(shape=(units,), initializer="zeros", trainable=True) + + def call(self, inputs): + return ops.matmul(inputs, self.w) + self.b +``` + +In many cases, you may not know in advance the size of your inputs, and you +would like to lazily create weights when that value becomes known, some time +after instantiating the layer. + +In the Keras API, we recommend creating layer weights in the +`build(self, inputs_shape)` method of your layer. Like this: + +```python +class Linear(keras.layers.Layer): + def __init__(self, units=32): + super().__init__() + self.units = units + + def build(self, input_shape): + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), initializer="random_normal", trainable=True + ) + + def call(self, inputs): + return ops.matmul(inputs, self.w) + self.b +``` + +The `__call__()` method of your layer will automatically run build the first time +it is called. You now have a layer that's lazy and thus easier to use: + +```python +# At instantiation, we don't know on what inputs this is going to get called +linear_layer = Linear(32) + +# The layer's weights are created dynamically the first time the layer is called +y = linear_layer(x) +``` + +Implementing `build()` separately as shown above nicely separates creating weights +only once from using weights in every call. + +## Layers are recursively composable + +If you assign a Layer instance as an attribute of another Layer, the outer layer +will start tracking the weights created by the inner layer. + +We recommend creating such sublayers in the `__init__()` method and leave it to +the first `__call__()` to trigger building their weights. + +```python +class MLPBlock(keras.layers.Layer): + def __init__(self): + super().__init__() + self.linear_1 = Linear(32) + self.linear_2 = Linear(32) + self.linear_3 = Linear(1) + + def call(self, inputs): + x = self.linear_1(inputs) + x = keras.activations.relu(x) + x = self.linear_2(x) + x = keras.activations.relu(x) + return self.linear_3(x) + + +mlp = MLPBlock() +y = mlp(ops.ones(shape=(3, 64))) # The first call to the `mlp` will create the weights +print("weights:", len(mlp.weights)) +print("trainable weights:", len(mlp.trainable_weights)) +``` + +## Backend-agnostic layers and backend-specific layers + +As long as a layer only uses APIs from the `keras.ops` namespace +(or other Keras namespaces such as `keras.activations`, `keras.random`, or `keras.layers`), +then it can be used with any backend -- TensorFlow, JAX, or PyTorch. + +All layers you've seen so far in this guide work with all Keras backends. + +The `keras.ops` namespace gives you access to: + +- The NumPy API, e.g. `ops.matmul`, `ops.sum`, `ops.reshape`, `ops.stack`, etc. +- Neural networks-specific APIs such as `ops.softmax`, `ops`.conv`, `ops.binary_crossentropy`, `ops.relu`, etc. + +You can also use backend-native APIs in your layers (such as `tf.nn` functions), +but if you do this, then your layer will only be usable with the backend in question. +For instance, you could write the following JAX-specific layer using `jax.numpy`: + +```python +import jax + +class Linear(keras.layers.Layer): + ... + + def call(self, inputs): + return jax.numpy.matmul(inputs, self.w) + self.b +``` + +This would be the equivalent TensorFlow-specific layer: + +```python +import tensorflow as tf + +class Linear(keras.layers.Layer): + ... + + def call(self, inputs): + return tf.matmul(inputs, self.w) + self.b +``` + +And this would be the equivalent PyTorch-specific layer: + +```python +import torch + +class Linear(keras.layers.Layer): + ... + + def call(self, inputs): + return torch.matmul(inputs, self.w) + self.b +``` + +Because cross-backend compatibility is a tremendously useful property, we strongly +recommend that you seek to always make your layers backend-agnostic by leveraging +only Keras APIs. + +## The `add_loss()` method + +When writing the `call()` method of a layer, you can create loss tensors that +you will want to use later, when writing your training loop. This is doable by +calling `self.add_loss(value)`: + +```python +# A layer that creates an activity regularization loss +class ActivityRegularizationLayer(keras.layers.Layer): + def __init__(self, rate=1e-2): + super().__init__() + self.rate = rate + + def call(self, inputs): + self.add_loss(self.rate * ops.mean(inputs)) + return inputs +``` + +These losses (including those created by any inner layer) can be retrieved via +`layer.losses`. This property is reset at the start of every `__call__()` to +the top-level layer, so that `layer.losses` always contains the loss values +created during the last forward pass. + +```python +class OuterLayer(keras.layers.Layer): + def __init__(self): + super().__init__() + self.activity_reg = ActivityRegularizationLayer(1e-2) + + def call(self, inputs): + return self.activity_reg(inputs) + + +layer = OuterLayer() +assert len(layer.losses) == 0 # No losses yet since the layer has never been called + +_ = layer(ops.zeros((1, 1))) +assert len(layer.losses) == 1 # We created one loss value + +# `layer.losses` gets reset at the start of each __call__ +_ = layer(ops.zeros((1, 1))) +assert len(layer.losses) == 1 # This is the loss created during the call above +``` + +In addition, the `loss` property also contains regularization losses created +for the weights of any inner layer: + +```python +class OuterLayerWithKernelRegularizer(keras.layers.Layer): + def __init__(self): + super().__init__() + self.dense = keras.layers.Dense( + 32, kernel_regularizer=keras.regularizers.l2(1e-3) + ) + + def call(self, inputs): + return self.dense(inputs) + + +layer = OuterLayerWithKernelRegularizer() +_ = layer(ops.zeros((1, 1))) + +# This is `1e-3 * sum(layer.dense.kernel ** 2)`, +# created by the `kernel_regularizer` above. +print(layer.losses) +``` + +These losses are meant to be taken into account when writing custom training loops. + +They also work seamlessly with `fit()` (they get automatically summed and added to the main loss, if any): + +```python +inputs = keras.Input(shape=(3,)) +outputs = ActivityRegularizationLayer()(inputs) +model = keras.Model(inputs, outputs) + +# If there is a loss passed in `compile`, the regularization +# losses get added to it +model.compile(optimizer="adam", loss="mse") +model.fit(np.random.random((2, 3)), np.random.random((2, 3))) + +# It's also possible not to pass any loss in `compile`, +# since the model already has a loss to minimize, via the `add_loss` +# call during the forward pass! +model.compile(optimizer="adam") +model.fit(np.random.random((2, 3)), np.random.random((2, 3))) +``` + +## You can optionally enable serialization on your layers + +If you need your custom layers to be serializable as part of a +[Functional model](/guides/functional_api/), +you can optionally implement a `get_config()` method: + +```python +class Linear(keras.layers.Layer): + def __init__(self, units=32): + super().__init__() + self.units = units + + def build(self, input_shape): + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), initializer="random_normal", trainable=True + ) + + def call(self, inputs): + return ops.matmul(inputs, self.w) + self.b + + def get_config(self): + return {"units": self.units} + + +# Now you can recreate the layer from its config: +layer = Linear(64) +config = layer.get_config() +print(config) +new_layer = Linear.from_config(config) +``` + +Note that the `__init__()` method of the base `Layer` class takes some keyword +arguments, in particular a `name` and a `dtype`. It's good practice to pass +these arguments to the parent class in `__init__()` and to include them in the +layer config: + +```python +class Linear(keras.layers.Layer): + def __init__(self, units=32, **kwargs): + super().__init__(**kwargs) + self.units = units + + def build(self, input_shape): + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer="random_normal", + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), initializer="random_normal", trainable=True + ) + + def call(self, inputs): + return ops.matmul(inputs, self.w) + self.b + + def get_config(self): + config = super().get_config() + config.update({"units": self.units}) + return config + + +layer = Linear(64) +config = layer.get_config() +print(config) +new_layer = Linear.from_config(config) +``` + +If you need more flexibility when deserializing the layer from its config, you +can also override the `from_config()` class method. This is the base +implementation of `from_config()`: + +```python +def from_config(cls, config): + return cls(**config) +``` + +To learn more about serialization and saving, see the complete +[guide to saving and serializing models](/guides/serialization_and_saving/). + +## Privileged `training` argument in the `call()` method + +Some layers, in particular the `BatchNormalization` layer and the `Dropout` +layer, have different behaviors during training and inference. For such +layers, it is standard practice to expose a `training` (boolean) argument in +the `call()` method. + +By exposing this argument in `call()`, you enable the built-in training and +evaluation loops (e.g. `fit()`) to correctly use the layer in training and +inference. + +```python +class CustomDropout(keras.layers.Layer): + def __init__(self, rate, **kwargs): + super().__init__(**kwargs) + self.rate = rate + self.seed_generator = keras.random.SeedGenerator(1337) + + def call(self, inputs, training=None): + if training: + return keras.random.dropout( + inputs, rate=self.rate, seed=self.seed_generator + ) + return inputs +``` + +## Privileged `mask` argument in the `call()` method + +The other privileged argument supported by `call()` is the `mask` argument. + +You will find it in all Keras RNN layers. A mask is a boolean tensor (one +boolean value per timestep in the input) used to skip certain input timesteps +when processing timeseries data. + +Keras will automatically pass the correct `mask` argument to `__call__()` for +layers that support it, when a mask is generated by a prior layer. +Mask-generating layers are the `Embedding` +layer configured with `mask_zero=True`, and the `Masking` layer. + +## The `Model` class + +In general, you will use the `Layer` class to define inner computation blocks, +and will use the `Model` class to define the outer model -- the object you +will train. + +For instance, in a ResNet50 model, you would have several ResNet blocks +subclassing `Layer`, and a single `Model` encompassing the entire ResNet50 +network. + +The `Model` class has the same API as `Layer`, with the following differences: + +- It exposes built-in training, evaluation, and prediction loops +(`model.fit()`, `model.evaluate()`, `model.predict()`). +- It exposes the list of its inner layers, via the `model.layers` property. +- It exposes saving and serialization APIs (`save()`, `save_weights()`...) + +Effectively, the `Layer` class corresponds to what we refer to in the +literature as a "layer" (as in "convolution layer" or "recurrent layer") or as +a "block" (as in "ResNet block" or "Inception block"). + +Meanwhile, the `Model` class corresponds to what is referred to in the +literature as a "model" (as in "deep learning model") or as a "network" (as in +"deep neural network"). + +So if you're wondering, "should I use the `Layer` class or the `Model` class?", +ask yourself: will I need to call `fit()` on it? Will I need to call `save()` +on it? If so, go with `Model`. If not (either because your class is just a block +in a bigger system, or because you are writing training & saving code yourself), +use `Layer`. + +For instance, we could take our mini-resnet example above, and use it to build +a `Model` that we could train with `fit()`, and that we could save with +`save_weights()`: + +```python +class ResNet(keras.Model): + + def __init__(self, num_classes=1000): + super().__init__() + self.block_1 = ResNetBlock() + self.block_2 = ResNetBlock() + self.global_pool = layers.GlobalAveragePooling2D() + self.classifier = Dense(num_classes) + + def call(self, inputs): + x = self.block_1(inputs) + x = self.block_2(x) + x = self.global_pool(x) + return self.classifier(x) + + +resnet = ResNet() +dataset = ... +resnet.fit(dataset, epochs=10) +resnet.save(filepath.keras) +``` + +## Putting it all together: an end-to-end example + +Here's what you've learned so far: + +- A `Layer` encapsulate a state (created in `__init__()` or `build()`) and some +computation (defined in `call()`). +- Layers can be recursively nested to create new, bigger computation blocks. +- Layers are backend-agnostic as long as they only use Keras APIs. You can use +backend-native APIs (such as `jax.numpy`, `torch.nn` or `tf.nn`), but then +your layer will only be usable with that specific backend. +- Layers can create and track losses (typically regularization losses) +via `add_loss()`. +- The outer container, the thing you want to train, is a `Model`. A `Model` is +just like a `Layer`, but with added training and serialization utilities. + +Let's put all of these things together into an end-to-end example: we're going +to implement a Variational AutoEncoder (VAE) in a backend-agnostic fashion +-- so that it runs the same with TensorFlow, JAX, and PyTorch. +We'll train it on MNIST digits. + +Our VAE will be a subclass of `Model`, built as a nested composition of layers +that subclass `Layer`. It will feature a regularization loss (KL divergence). + +```python +class Sampling(layers.Layer): + """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit.""" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.seed_generator = keras.random.SeedGenerator(1337) + + def call(self, inputs): + z_mean, z_log_var = inputs + batch = ops.shape(z_mean)[0] + dim = ops.shape(z_mean)[1] + epsilon = keras.random.normal(shape=(batch, dim), seed=self.seed_generator) + return z_mean + ops.exp(0.5 * z_log_var) * epsilon + + +class Encoder(layers.Layer): + """Maps MNIST digits to a triplet (z_mean, z_log_var, z).""" + + def __init__(self, latent_dim=32, intermediate_dim=64, name="encoder", **kwargs): + super().__init__(name=name, **kwargs) + self.dense_proj = layers.Dense(intermediate_dim, activation="relu") + self.dense_mean = layers.Dense(latent_dim) + self.dense_log_var = layers.Dense(latent_dim) + self.sampling = Sampling() + + def call(self, inputs): + x = self.dense_proj(inputs) + z_mean = self.dense_mean(x) + z_log_var = self.dense_log_var(x) + z = self.sampling((z_mean, z_log_var)) + return z_mean, z_log_var, z + + +class Decoder(layers.Layer): + """Converts z, the encoded digit vector, back into a readable digit.""" + + def __init__(self, original_dim, intermediate_dim=64, name="decoder", **kwargs): + super().__init__(name=name, **kwargs) + self.dense_proj = layers.Dense(intermediate_dim, activation="relu") + self.dense_output = layers.Dense(original_dim, activation="sigmoid") + + def call(self, inputs): + x = self.dense_proj(inputs) + return self.dense_output(x) + + +class VariationalAutoEncoder(keras.Model): + """Combines the encoder and decoder into an end-to-end model for training.""" + + def __init__( + self, + original_dim, + intermediate_dim=64, + latent_dim=32, + name="autoencoder", + **kwargs + ): + super().__init__(name=name, **kwargs) + self.original_dim = original_dim + self.encoder = Encoder(latent_dim=latent_dim, intermediate_dim=intermediate_dim) + self.decoder = Decoder(original_dim, intermediate_dim=intermediate_dim) + + def call(self, inputs): + z_mean, z_log_var, z = self.encoder(inputs) + reconstructed = self.decoder(z) + # Add KL divergence regularization loss. + kl_loss = -0.5 * ops.mean( + z_log_var - ops.square(z_mean) - ops.exp(z_log_var) + 1 + ) + self.add_loss(kl_loss) + return reconstructed +``` + +Let's train it on MNIST using the `fit()` API: + +```python +(x_train, _), _ = keras.datasets.mnist.load_data() +x_train = x_train.reshape(60000, 784).astype("float32") / 255 + +original_dim = 784 +vae = VariationalAutoEncoder(784, 64, 32) + +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +vae.compile(optimizer, loss=keras.losses.MeanSquaredError()) + +vae.fit(x_train, x_train, epochs=2, batch_size=64) +``` + diff --git a/.tether/vignettes-src/parked/_custom_train_step_in_jax.Rmd b/.tether/vignettes-src/parked/_custom_train_step_in_jax.Rmd new file mode 100644 index 0000000000..d5a76a8bcd --- /dev/null +++ b/.tether/vignettes-src/parked/_custom_train_step_in_jax.Rmd @@ -0,0 +1,337 @@ +--- +title: Customizing what happens in `fit()` with JAX +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/27 +last-modified: 2023/06/27 +description: Overriding the training step of the Model class with JAX. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/custom_train_step_in_jax.py +--- + +## Introduction + +When you're doing supervised learning, you can use `fit()` and everything works +smoothly. + +When you need to take control of every little detail, you can write your own training +loop entirely from scratch. + +But what if you need a custom training algorithm, but you still want to benefit from +the convenient features of `fit()`, such as callbacks, built-in distribution support, +or step fusing? + +A core principle of Keras is **progressive disclosure of complexity**. You should +always be able to get into lower-level workflows in a gradual way. You shouldn't fall +off a cliff if the high-level functionality doesn't exactly match your use case. You +should be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience. + +When you need to customize what `fit()` does, you should **override the training step +function of the `Model` class**. This is the function that is called by `fit()` for +every batch of data. You will then be able to call `fit()` as usual -- and it will be +running your own learning algorithm. + +Note that this pattern does not prevent you from building models with the Functional +API. You can do this whether you're building `Sequential` models, Functional API +models, or subclassed models. + +Let's see how that works. + +## Setup + +```python +import os + +# This guide can only be run with the JAX backend. +os.environ["KERAS_BACKEND"] = "jax" + +import jax +import keras +import numpy as np +``` + +## A first simple example + +Let's start from a simple example: + +- We create a new class that subclasses `keras.Model`. +- We implement a fully-stateless `compute_loss_and_updates()` method +to compute the loss as well as the updated values for the non-trainable +variables of the model. Internally, it calls `stateless_call()` and +the built-in `compute_loss()`. +- We implement a fully-stateless `train_step()` method to compute current +metric values (including the loss) as well as updated values for the +trainable variables, the optimizer variables, and the metric variables. + +Note that you can also take into account the `sample_weight` argument by: + +- Unpacking the data as `x, y, sample_weight = data` +- Passing `sample_weight` to `compute_loss()` +- Passing `sample_weight` alongside `y` and `y_pred` +to metrics in `stateless_update_state()` + +```python +class CustomModel(keras.Model): + def compute_loss_and_updates( + self, + trainable_variables, + non_trainable_variables, + x, + y, + training=False, + ): + y_pred, non_trainable_variables = self.stateless_call( + trainable_variables, + non_trainable_variables, + x, + training=training, + ) + loss = self.compute_loss(x, y, y_pred) + return loss, (y_pred, non_trainable_variables) + + def train_step(self, state, data): + ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metrics_variables, + ) = state + x, y = data + + # Get the gradient function. + grad_fn = jax.value_and_grad(self.compute_loss_and_updates, has_aux=True) + + # Compute the gradients. + (loss, (y_pred, non_trainable_variables)), grads = grad_fn( + trainable_variables, + non_trainable_variables, + x, + y, + training=True, + ) + + # Update trainable variables and optimizer variables. + ( + trainable_variables, + optimizer_variables, + ) = self.optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + + # Update metrics. + new_metrics_vars = [] + logs = {} + for metric in self.metrics: + this_metric_vars = metrics_variables[ + len(new_metrics_vars) : len(new_metrics_vars) + len(metric.variables) + ] + if metric.name == "loss": + this_metric_vars = metric.stateless_update_state(this_metric_vars, loss) + else: + this_metric_vars = metric.stateless_update_state( + this_metric_vars, y, y_pred + ) + logs[metric.name] = metric.stateless_result(this_metric_vars) + new_metrics_vars += this_metric_vars + + # Return metric logs and updated state variables. + state = ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + new_metrics_vars, + ) + return logs, state +``` + +Let's try this out: + +```python +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# Just use `fit` as usual +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=3) +``` + +## Going lower-level + +Naturally, you could just skip passing a loss function in `compile()`, and instead do +everything *manually* in `train_step`. Likewise for metrics. + +Here's a lower-level example, that only uses `compile()` to configure the optimizer: + +```python +class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_tracker = keras.metrics.Mean(name="loss") + self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae") + self.loss_fn = keras.losses.MeanSquaredError() + + def compute_loss_and_updates( + self, + trainable_variables, + non_trainable_variables, + x, + y, + training=False, + ): + y_pred, non_trainable_variables = self.stateless_call( + trainable_variables, + non_trainable_variables, + x, + training=training, + ) + loss = self.loss_fn(y, y_pred) + return loss, (y_pred, non_trainable_variables) + + def train_step(self, state, data): + ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metrics_variables, + ) = state + x, y = data + + # Get the gradient function. + grad_fn = jax.value_and_grad(self.compute_loss_and_updates, has_aux=True) + + # Compute the gradients. + (loss, (y_pred, non_trainable_variables)), grads = grad_fn( + trainable_variables, + non_trainable_variables, + x, + y, + training=True, + ) + + # Update trainable variables and optimizer variables. + ( + trainable_variables, + optimizer_variables, + ) = self.optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + + # Update metrics. + loss_tracker_vars = metrics_variables[: len(self.loss_tracker.variables)] + mae_metric_vars = metrics_variables[len(self.loss_tracker.variables) :] + + loss_tracker_vars = self.loss_tracker.stateless_update_state( + loss_tracker_vars, loss + ) + mae_metric_vars = self.mae_metric.stateless_update_state( + mae_metric_vars, y, y_pred + ) + + logs = {} + logs[self.loss_tracker.name] = self.loss_tracker.stateless_result( + loss_tracker_vars + ) + logs[self.mae_metric.name] = self.mae_metric.stateless_result(mae_metric_vars) + + new_metrics_vars = loss_tracker_vars + mae_metric_vars + + # Return metric logs and updated state variables. + state = ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + new_metrics_vars, + ) + return logs, state + + @property + def metrics(self): + # We list our `Metric` objects here so that `reset_states()` can be + # called automatically at the start of each epoch + # or at the start of `evaluate()`. + return [self.loss_tracker, self.mae_metric] + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) + +# We don't pass a loss or metrics here. +model.compile(optimizer="adam") + +# Just use `fit` as usual -- you can use callbacks, etc. +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=5) +``` + +## Providing your own evaluation step + +What if you want to do the same for calls to `model.evaluate()`? Then you would +override `test_step` in exactly the same way. Here's what it looks like: + +```python +class CustomModel(keras.Model): + def test_step(self, state, data): + # Unpack the data. + x, y = data + ( + trainable_variables, + non_trainable_variables, + metrics_variables, + ) = state + + # Compute predictions and loss. + y_pred, non_trainable_variables = self.stateless_call( + trainable_variables, + non_trainable_variables, + x, + training=False, + ) + loss = self.compute_loss(x, y, y_pred) + + # Update metrics. + new_metrics_vars = [] + for metric in self.metrics: + this_metric_vars = metrics_variables[ + len(new_metrics_vars) : len(new_metrics_vars) + len(metric.variables) + ] + if metric.name == "loss": + this_metric_vars = metric.stateless_update_state(this_metric_vars, loss) + else: + this_metric_vars = metric.stateless_update_state( + this_metric_vars, y, y_pred + ) + logs = metric.stateless_result(this_metric_vars) + new_metrics_vars += this_metric_vars + + # Return metric logs and updated state variables. + state = ( + trainable_variables, + non_trainable_variables, + new_metrics_vars, + ) + return logs, state + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(loss="mse", metrics=["mae"]) + +# Evaluate with our custom test_step +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.evaluate(x, y) +``` + +That's it! + diff --git a/.tether/vignettes-src/parked/_custom_train_step_in_torch.Rmd b/.tether/vignettes-src/parked/_custom_train_step_in_torch.Rmd new file mode 100644 index 0000000000..505a4422f4 --- /dev/null +++ b/.tether/vignettes-src/parked/_custom_train_step_in_torch.Rmd @@ -0,0 +1,483 @@ +--- +title: Customizing what happens in `fit()` with PyTorch +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/27 +last-modified: 2023/06/27 +description: Overriding the training step of the Model class with PyTorch. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/custom_train_step_in_torch.py +--- + +## Introduction + +When you're doing supervised learning, you can use `fit()` and everything works +smoothly. + +When you need to take control of every little detail, you can write your own training +loop entirely from scratch. + +But what if you need a custom training algorithm, but you still want to benefit from +the convenient features of `fit()`, such as callbacks, built-in distribution support, +or step fusing? + +A core principle of Keras is **progressive disclosure of complexity**. You should +always be able to get into lower-level workflows in a gradual way. You shouldn't fall +off a cliff if the high-level functionality doesn't exactly match your use case. You +should be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience. + +When you need to customize what `fit()` does, you should **override the training step +function of the `Model` class**. This is the function that is called by `fit()` for +every batch of data. You will then be able to call `fit()` as usual -- and it will be +running your own learning algorithm. + +Note that this pattern does not prevent you from building models with the Functional +API. You can do this whether you're building `Sequential` models, Functional API +models, or subclassed models. + +Let's see how that works. + +## Setup + +```python +import os + +# This guide can only be run with the torch backend. +os.environ["KERAS_BACKEND"] = "torch" + +import torch +import keras +from keras import layers +import numpy as np +``` + +## A first simple example + +Let's start from a simple example: + +- We create a new class that subclasses `keras.Model`. +- We just override the method `train_step(self, data)`. +- We return a dictionary mapping metric names (including the loss) to their current +value. + +The input argument `data` is what gets passed to fit as training data: + +- If you pass NumPy arrays, by calling `fit(x, y, ...)`, then `data` will be the tuple +`(x, y)` +- If you pass a `torch.utils.data.DataLoader` or a `tf.data.Dataset`, +by calling `fit(dataset, ...)`, then `data` will be what gets yielded +by `dataset` at each batch. + +In the body of the `train_step()` method, we implement a regular training update, +similar to what you are already familiar with. Importantly, **we compute the loss via +`self.compute_loss()`**, which wraps the loss(es) function(s) that were passed to +`compile()`. + +Similarly, we call `metric.update_state(y, y_pred)` on metrics from `self.metrics`, +to update the state of the metrics that were passed in `compile()`, +and we query results from `self.metrics` at the end to retrieve their current value. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + x, y = data + + # Call torch.nn.Module.zero_grad() to clear the leftover gradients + # for the weights from the previous train step. + self.zero_grad() + + # Compute loss + y_pred = self(x, training=True) # Forward pass + loss = self.compute_loss(y=y, y_pred=y_pred) + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + + trainable_weights = [v for v in self.trainable_weights] + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + self.optimizer.apply(gradients, trainable_weights) + + # Update metrics (includes the metric that tracks the loss) + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred) + + # Return a dict mapping metric names to current value + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} +``` + +Let's try this out: + +```python +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# Just use `fit` as usual +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=3) +``` + +## Going lower-level + +Naturally, you could just skip passing a loss function in `compile()`, and instead do +everything *manually* in `train_step`. Likewise for metrics. + +Here's a lower-level example, that only uses `compile()` to configure the optimizer: + +- We start by creating `Metric` instances to track our loss and a MAE score (in `__init__()`). +- We implement a custom `train_step()` that updates the state of these metrics +(by calling `update_state()` on them), then query them (via `result()`) to return their current average value, +to be displayed by the progress bar and to be pass to any callback. +- Note that we would need to call `reset_states()` on our metrics between each epoch! Otherwise +calling `result()` would return an average since the start of training, whereas we usually work +with per-epoch averages. Thankfully, the framework can do that for us: just list any metric +you want to reset in the `metrics` property of the model. The model will call `reset_states()` +on any object listed here at the beginning of each `fit()` epoch or at the beginning of a call to +`evaluate()`. + +```python +class CustomModel(keras.Model): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.loss_tracker = keras.metrics.Mean(name="loss") + self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae") + self.loss_fn = keras.losses.MeanSquaredError() + + def train_step(self, data): + x, y = data + + # Call torch.nn.Module.zero_grad() to clear the leftover gradients + # for the weights from the previous train step. + self.zero_grad() + + # Compute loss + y_pred = self(x, training=True) # Forward pass + loss = self.loss_fn(y, y_pred) + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + + trainable_weights = [v for v in self.trainable_weights] + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + self.optimizer.apply(gradients, trainable_weights) + + # Compute our own metrics + self.loss_tracker.update_state(loss) + self.mae_metric.update_state(y, y_pred) + return { + "loss": self.loss_tracker.result(), + "mae": self.mae_metric.result(), + } + + @property + def metrics(self): + # We list our `Metric` objects here so that `reset_states()` can be + # called automatically at the start of each epoch + # or at the start of `evaluate()`. + return [self.loss_tracker, self.mae_metric] + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) + +# We don't pass a loss or metrics here. +model.compile(optimizer="adam") + +# Just use `fit` as usual -- you can use callbacks, etc. +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.fit(x, y, epochs=5) +``` + +## Supporting `sample_weight` & `class_weight` + +You may have noticed that our first basic example didn't make any mention of sample +weighting. If you want to support the `fit()` arguments `sample_weight` and +`class_weight`, you'd simply do the following: + +- Unpack `sample_weight` from the `data` argument +- Pass it to `compute_loss` & `update_state` (of course, you could also just apply +it manually if you don't rely on `compile()` for losses & metrics) +- That's it. + +```python +class CustomModel(keras.Model): + def train_step(self, data): + # Unpack the data. Its structure depends on your model and + # on what you pass to `fit()`. + if len(data) == 3: + x, y, sample_weight = data + else: + sample_weight = None + x, y = data + + # Call torch.nn.Module.zero_grad() to clear the leftover gradients + # for the weights from the previous train step. + self.zero_grad() + + # Compute loss + y_pred = self(x, training=True) # Forward pass + loss = self.compute_loss( + y=y, + y_pred=y_pred, + sample_weight=sample_weight, + ) + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + + trainable_weights = [v for v in self.trainable_weights] + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + self.optimizer.apply(gradients, trainable_weights) + + # Update metrics (includes the metric that tracks the loss) + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred, sample_weight=sample_weight) + + # Return a dict mapping metric names to current value + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct and compile an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(optimizer="adam", loss="mse", metrics=["mae"]) + +# You can now use sample_weight argument +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +sw = np.random.random((1000, 1)) +model.fit(x, y, sample_weight=sw, epochs=3) +``` + +## Providing your own evaluation step + +What if you want to do the same for calls to `model.evaluate()`? Then you would +override `test_step` in exactly the same way. Here's what it looks like: + +```python +class CustomModel(keras.Model): + def test_step(self, data): + # Unpack the data + x, y = data + # Compute predictions + y_pred = self(x, training=False) + # Updates the metrics tracking the loss + loss = self.compute_loss(y=y, y_pred=y_pred) + # Update the metrics. + for metric in self.metrics: + if metric.name == "loss": + metric.update_state(loss) + else: + metric.update_state(y, y_pred) + # Return a dict mapping metric names to current value. + # Note that it will include the loss (tracked in self.metrics). + return {m.name: m.result() for m in self.metrics} + + +# Construct an instance of CustomModel +inputs = keras.Input(shape=(32,)) +outputs = keras.layers.Dense(1)(inputs) +model = CustomModel(inputs, outputs) +model.compile(loss="mse", metrics=["mae"]) + +# Evaluate with our custom test_step +x = np.random.random((1000, 32)) +y = np.random.random((1000, 1)) +model.evaluate(x, y) +``` + +## Wrapping up: an end-to-end GAN example + +Let's walk through an end-to-end example that leverages everything you just learned. + +Let's consider: + +- A generator network meant to generate 28x28x1 images. +- A discriminator network meant to classify 28x28x1 images into two classes ("fake" and +"real"). +- One optimizer for each. +- A loss function to train the discriminator. + +```python +# Create the discriminator +discriminator = keras.Sequential( + [ + keras.Input(shape=(28, 28, 1)), + layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.GlobalMaxPooling2D(), + layers.Dense(1), + ], + name="discriminator", +) + +# Create the generator +latent_dim = 128 +generator = keras.Sequential( + [ + keras.Input(shape=(latent_dim,)), + # We want to generate 128 coefficients to reshape into a 7x7x128 map + layers.Dense(7 * 7 * 128), + layers.LeakyReLU(negative_slope=0.2), + layers.Reshape((7, 7, 128)), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + layers.LeakyReLU(negative_slope=0.2), + layers.Conv2D(1, (7, 7), padding="same", activation="sigmoid"), + ], + name="generator", +) +``` + +Here's a feature-complete GAN class, overriding `compile()` to use its own signature, +and implementing the entire GAN algorithm in 17 lines in `train_step`: + +```python +class GAN(keras.Model): + def __init__(self, discriminator, generator, latent_dim): + super().__init__() + self.discriminator = discriminator + self.generator = generator + self.latent_dim = latent_dim + self.d_loss_tracker = keras.metrics.Mean(name="d_loss") + self.g_loss_tracker = keras.metrics.Mean(name="g_loss") + self.seed_generator = keras.random.SeedGenerator(1337) + self.built = True + + @property + def metrics(self): + return [self.d_loss_tracker, self.g_loss_tracker] + + def compile(self, d_optimizer, g_optimizer, loss_fn): + super().compile() + self.d_optimizer = d_optimizer + self.g_optimizer = g_optimizer + self.loss_fn = loss_fn + + def train_step(self, real_images): + device = "cuda" if torch.cuda.is_available() else "cpu" + if isinstance(real_images, tuple): + real_images = real_images[0] + # Sample random points in the latent space + batch_size = real_images.shape[0] + random_latent_vectors = keras.random.normal( + shape=(batch_size, self.latent_dim), seed=self.seed_generator + ) + + # Decode them to fake images + generated_images = self.generator(random_latent_vectors) + + # Combine them with real images + real_images = torch.tensor(real_images, device=device) + combined_images = torch.concat([generated_images, real_images], axis=0) + + # Assemble labels discriminating real from fake images + labels = torch.concat( + [ + torch.ones((batch_size, 1), device=device), + torch.zeros((batch_size, 1), device=device), + ], + axis=0, + ) + # Add random noise to the labels - important trick! + labels += 0.05 * keras.random.uniform(labels.shape, seed=self.seed_generator) + + # Train the discriminator + self.zero_grad() + predictions = self.discriminator(combined_images) + d_loss = self.loss_fn(labels, predictions) + d_loss.backward() + grads = [v.value.grad for v in self.discriminator.trainable_weights] + with torch.no_grad(): + self.d_optimizer.apply(grads, self.discriminator.trainable_weights) + + # Sample random points in the latent space + random_latent_vectors = keras.random.normal( + shape=(batch_size, self.latent_dim), seed=self.seed_generator + ) + + # Assemble labels that say "all real images" + misleading_labels = torch.zeros((batch_size, 1), device=device) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + self.zero_grad() + predictions = self.discriminator(self.generator(random_latent_vectors)) + g_loss = self.loss_fn(misleading_labels, predictions) + grads = g_loss.backward() + grads = [v.value.grad for v in self.generator.trainable_weights] + with torch.no_grad(): + self.g_optimizer.apply(grads, self.generator.trainable_weights) + + # Update metrics and return their value. + self.d_loss_tracker.update_state(d_loss) + self.g_loss_tracker.update_state(g_loss) + return { + "d_loss": self.d_loss_tracker.result(), + "g_loss": self.g_loss_tracker.result(), + } +``` + +Let's test-drive it: + +```python +# Prepare the dataset. We use both the training & test MNIST digits. +batch_size = 64 +(x_train, _), (x_test, _) = keras.datasets.mnist.load_data() +all_digits = np.concatenate([x_train, x_test]) +all_digits = all_digits.astype("float32") / 255.0 +all_digits = np.reshape(all_digits, (-1, 28, 28, 1)) + +# Create a TensorDataset +dataset = torch.utils.data.TensorDataset( + torch.from_numpy(all_digits), torch.from_numpy(all_digits) +) +# Create a DataLoader +dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) + +gan = GAN(discriminator=discriminator, generator=generator, latent_dim=latent_dim) +gan.compile( + d_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + g_optimizer=keras.optimizers.Adam(learning_rate=0.0003), + loss_fn=keras.losses.BinaryCrossentropy(from_logits=True), +) + +gan.fit(dataloader, epochs=1) +``` + +The ideas behind deep learning are simple, so why should their implementation be painful? + diff --git a/.tether/vignettes-src/parked/_distributed_training_with_jax.Rmd b/.tether/vignettes-src/parked/_distributed_training_with_jax.Rmd new file mode 100644 index 0000000000..05de6aaf6b --- /dev/null +++ b/.tether/vignettes-src/parked/_distributed_training_with_jax.Rmd @@ -0,0 +1,263 @@ +--- +title: Multi-GPU distributed training with JAX +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/07/11 +last-modified: 2023/07/11 +description: Guide to multi-GPU/TPU training for Keras models with JAX. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/distributed_training_with_jax.py +--- + +## Introduction + +There are generally two ways to distribute computation across multiple devices: + +**Data parallelism**, where a single model gets replicated on multiple devices or +multiple machines. Each of them processes different batches of data, then they merge +their results. There exist many variants of this setup, that differ in how the different +model replicas merge results, in whether they stay in sync at every batch or whether they +are more loosely coupled, etc. + +**Model parallelism**, where different parts of a single model run on different devices, +processing a single batch of data together. This works best with models that have a +naturally-parallel architecture, such as models that feature multiple branches. + +This guide focuses on data parallelism, in particular **synchronous data parallelism**, +where the different replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you would see for +single-device training. + +Specifically, this guide teaches you how to use `jax.sharding` APIs to train Keras +models, with minimal changes to your code, on multiple GPUs or TPUS (typically 2 to 16) +installed on a single machine (single host, multi-device training). This is the +most common setup for researchers and small-scale industry workflows. + +## Setup + +Let's start by defining the function that creates the model that we will train, +and the function that creates the dataset we will train on (MNIST in this case). + +```python +import os + +os.environ["KERAS_BACKEND"] = "jax" + +import jax +import numpy as np +import tensorflow as tf +import keras + +from jax.experimental import mesh_utils +from jax.sharding import Mesh +from jax.sharding import NamedSharding +from jax.sharding import PartitionSpec as P + + +def get_model(): + # Make a simple convnet with batch normalization and dropout. + inputs = keras.Input(shape=(28, 28, 1)) + x = keras.layers.Rescaling(1.0 / 255.0)(inputs) + x = keras.layers.Conv2D(filters=12, kernel_size=3, padding="same", use_bias=False)( + x + ) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=24, + kernel_size=6, + use_bias=False, + strides=2, + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=32, + kernel_size=6, + padding="same", + strides=2, + name="large_k", + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.GlobalAveragePooling2D()(x) + x = keras.layers.Dense(256, activation="relu")(x) + x = keras.layers.Dropout(0.5)(x) + outputs = keras.layers.Dense(10)(x) + model = keras.Model(inputs, outputs) + return model + + +def get_datasets(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") + x_test = x_test.astype("float32") + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + print(x_train.shape[0], "train samples") + print(x_test.shape[0], "test samples") + + # Create TF Datasets + train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train)) + eval_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)) + return train_data, eval_data +``` + +## Single-host, multi-device synchronous training + +In this setup, you have one machine with several GPUs or TPUs on it (typically 2 to 16). +Each device will run a copy of your model (called a **replica**). For simplicity, in +what follows, we'll assume we're dealing with 8 GPUs, at no loss of generality. + +**How it works** + +At each step of training: + +- The current batch of data (called **global batch**) is split into 8 different + sub-batches (called **local batches**). For instance, if the global batch has 512 + samples, each of the 8 local batches will have 64 samples. +- Each of the 8 replicas independently processes a local batch: they run a forward pass, + then a backward pass, outputting the gradient of the weights with respect to the loss of + the model on the local batch. +- The weight updates originating from local gradients are efficiently merged across the 8 + replicas. Because this is done at the end of every step, the replicas always stay in + sync. + +In practice, the process of synchronously updating the weights of the model replicas is +handled at the level of each individual weight variable. This is done through a using +a `jax.sharding.NamedSharding` that is configured to replicate the variables. + +**How to use it** + +To do single-host, multi-device synchronous training with a Keras model, you +would use the `jax.sharding` features. Here's how it works: + +- We first create a device mesh using `mesh_utils.create_device_mesh`. +- We use `jax.sharding.Mesh`, `jax.sharding.NamedSharding` and + `jax.sharding.PartitionSpec` to define how to partition JAX arrays. + - We specify that we want to replicate the model and optimizer variables + across all devices by using a spec with no axis. + - We specify that we want to shard the data across devices by using a spec + that splits along the batch dimension. +- We use `jax.device_put` to replicate the model and optimizer variables across + devices. This happens once at the beginning. +- In the training loop, for each batch that we process, we use `jax.device_put` + to split the batch across devices before invoking the train step. + +Here's the flow, where each step is split into its own utility function: + +```python +# Config +num_epochs = 2 +batch_size = 64 + +train_data, eval_data = get_datasets() +train_data = train_data.batch(batch_size, drop_remainder=True) + +model = get_model() +optimizer = keras.optimizers.Adam(1e-3) +loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +# Initialize all state with .build() +(one_batch, one_batch_labels) = next(iter(train_data)) +model.build(one_batch) +optimizer.build(model.trainable_variables) + + +# This is the loss function that will be differentiated. +# Keras provides a pure functional forward pass: model.stateless_call +def compute_loss(trainable_variables, non_trainable_variables, x, y): + y_pred, updated_non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss_value = loss(y, y_pred) + return loss_value, updated_non_trainable_variables + + +# Function to compute gradients +compute_gradients = jax.value_and_grad(compute_loss, has_aux=True) + + +# Training step, Keras provides a pure functional optimizer.stateless_apply +@jax.jit +def train_step(train_state, x, y): + trainable_variables, non_trainable_variables, optimizer_variables = train_state + (loss_value, non_trainable_variables), grads = compute_gradients( + trainable_variables, non_trainable_variables, x, y + ) + + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + + return loss_value, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + ) + + +# Replicate the model and optimizer variable on all devices +def get_replicated_train_state(devices): + # All variables will be replicated on all devices + var_mesh = Mesh(devices, axis_names=("_")) + # In NamedSharding, axes not mentioned are replicated (all axes here) + var_replication = NamedSharding(var_mesh, P()) + + # Apply the distribution settings to the model variables + trainable_variables = jax.device_put(model.trainable_variables, var_replication) + non_trainable_variables = jax.device_put( + model.non_trainable_variables, var_replication + ) + optimizer_variables = jax.device_put(optimizer.variables, var_replication) + + # Combine all state in a tuple + return (trainable_variables, non_trainable_variables, optimizer_variables) + + +num_devices = len(jax.local_devices()) +print(f"Running on {num_devices} devices: {jax.local_devices()}") +devices = mesh_utils.create_device_mesh((num_devices,)) + +# Data will be split along the batch axis +data_mesh = Mesh(devices, axis_names=("batch",)) # naming axes of the mesh +data_sharding = NamedSharding( + data_mesh, + P( + "batch", + ), +) # naming axes of the sharded partition + +# Display data sharding +x, y = next(iter(train_data)) +sharded_x = jax.device_put(x.numpy(), data_sharding) +print("Data sharding") +jax.debug.visualize_array_sharding(jax.numpy.reshape(sharded_x, [-1, 28 * 28])) + +train_state = get_replicated_train_state(devices) + +# Custom training loop +for epoch in range(num_epochs): + data_iter = iter(train_data) + for data in data_iter: + x, y = data + sharded_x = jax.device_put(x.numpy(), data_sharding) + loss_value, train_state = train_step(train_state, sharded_x, y.numpy()) + print("Epoch", epoch, "loss:", loss_value) + +# Post-processing model state update to write them back into the model +trainable_variables, non_trainable_variables, optimizer_variables = train_state +for variable, value in zip(model.trainable_variables, trainable_variables): + variable.assign(value) +for variable, value in zip(model.non_trainable_variables, non_trainable_variables): + variable.assign(value) +``` + +That's it! + diff --git a/.tether/vignettes-src/parked/_distributed_training_with_torch.Rmd b/.tether/vignettes-src/parked/_distributed_training_with_torch.Rmd new file mode 100644 index 0000000000..a8c70fb00f --- /dev/null +++ b/.tether/vignettes-src/parked/_distributed_training_with_torch.Rmd @@ -0,0 +1,266 @@ +--- +title: Multi-GPU distributed training with PyTorch +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/29 +last-modified: 2023/06/29 +description: Guide to multi-GPU training for Keras models with PyTorch. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/distributed_training_with_torch.py +--- + +## Introduction + +There are generally two ways to distribute computation across multiple devices: + +**Data parallelism**, where a single model gets replicated on multiple devices or +multiple machines. Each of them processes different batches of data, then they merge +their results. There exist many variants of this setup, that differ in how the different +model replicas merge results, in whether they stay in sync at every batch or whether they +are more loosely coupled, etc. + +**Model parallelism**, where different parts of a single model run on different devices, +processing a single batch of data together. This works best with models that have a +naturally-parallel architecture, such as models that feature multiple branches. + +This guide focuses on data parallelism, in particular **synchronous data parallelism**, +where the different replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you would see for +single-device training. + +Specifically, this guide teaches you how to use PyTorch's `DistributedDataParallel` +module wrapper to train Keras, with minimal changes to your code, +on multiple GPUs (typically 2 to 16) installed on a single machine (single host, +multi-device training). This is the most common setup for researchers and small-scale +industry workflows. + +## Setup + +Let's start by defining the function that creates the model that we will train, +and the function that creates the dataset we will train on (MNIST in this case). + +```python +import os + +os.environ["KERAS_BACKEND"] = "torch" + +import torch +import numpy as np +import keras + + +def get_model(): + # Make a simple convnet with batch normalization and dropout. + inputs = keras.Input(shape=(28, 28, 1)) + x = keras.layers.Rescaling(1.0 / 255.0)(inputs) + x = keras.layers.Conv2D(filters=12, kernel_size=3, padding="same", use_bias=False)( + x + ) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=24, + kernel_size=6, + use_bias=False, + strides=2, + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.Conv2D( + filters=32, + kernel_size=6, + padding="same", + strides=2, + name="large_k", + )(x) + x = keras.layers.BatchNormalization(scale=False, center=True)(x) + x = keras.layers.ReLU()(x) + x = keras.layers.GlobalAveragePooling2D()(x) + x = keras.layers.Dense(256, activation="relu")(x) + x = keras.layers.Dropout(0.5)(x) + outputs = keras.layers.Dense(10)(x) + model = keras.Model(inputs, outputs) + return model + + +def get_dataset(): + # Load the data and split it between train and test sets + (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + + # Scale images to the [0, 1] range + x_train = x_train.astype("float32") + x_test = x_test.astype("float32") + # Make sure images have shape (28, 28, 1) + x_train = np.expand_dims(x_train, -1) + x_test = np.expand_dims(x_test, -1) + print("x_train shape:", x_train.shape) + + # Create a TensorDataset + dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_train), torch.from_numpy(y_train) + ) + return dataset +``` + +Next, let's define a simple PyTorch training loop that targets +a GPU (note the calls to `.cuda()`). + +```python +def train_model(model, dataloader, num_epochs, optimizer, loss_fn): + for epoch in range(num_epochs): + running_loss = 0.0 + running_loss_count = 0 + for batch_idx, (inputs, targets) in enumerate(dataloader): + inputs = inputs.cuda(non_blocking=True) + targets = targets.cuda(non_blocking=True) + + # Forward pass + outputs = model(inputs) + loss = loss_fn(outputs, targets) + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + running_loss += loss.item() + running_loss_count += 1 + + # Print loss statistics + print( + f"Epoch {epoch + 1}/{num_epochs}, " + f"Loss: {running_loss / running_loss_count}" + ) +``` + +## Single-host, multi-device synchronous training + +In this setup, you have one machine with several GPUs on it (typically 2 to 16). Each +device will run a copy of your model (called a **replica**). For simplicity, in what +follows, we'll assume we're dealing with 8 GPUs, at no loss of generality. + +**How it works** + +At each step of training: + +- The current batch of data (called **global batch**) is split into 8 different +sub-batches (called **local batches**). For instance, if the global batch has 512 +samples, each of the 8 local batches will have 64 samples. +- Each of the 8 replicas independently processes a local batch: they run a forward pass, +then a backward pass, outputting the gradient of the weights with respect to the loss of +the model on the local batch. +- The weight updates originating from local gradients are efficiently merged across the 8 +replicas. Because this is done at the end of every step, the replicas always stay in +sync. + +In practice, the process of synchronously updating the weights of the model replicas is +handled at the level of each individual weight variable. This is done through a **mirrored +variable** object. + +**How to use it** + +To do single-host, multi-device synchronous training with a Keras model, you would use +the `torch.nn.parallel.DistributedDataParallel` module wrapper. +Here's how it works: + +- We use `torch.multiprocessing.start_processes` to start multiple Python processes, one +per device. Each process will run the `per_device_launch_fn` function. +- The `per_device_launch_fn` function does the following: + - It uses `torch.distributed.init_process_group` and `torch.cuda.set_device` + to configure the device to be used for that process. + - It uses `torch.utils.data.distributed.DistributedSampler` + and `torch.utils.data.DataLoader` to turn our data into a distributed data loader. + - It also uses `torch.nn.parallel.DistributedDataParallel` to turn our model into + a distributed PyTorch module. + - It then calls the `train_model` function. +- The `train_model` function will then run in each process, with the model using +a separate device in each process. + +Here's the flow, where each step is split into its own utility function: + +```python +# Config +num_gpu = torch.cuda.device_count() +num_epochs = 2 +batch_size = 64 +print(f"Running on {num_gpu} GPUs") + + +def setup_device(current_gpu_index, num_gpus): + # Device setup + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "56492" + device = torch.device("cuda:{}".format(current_gpu_index)) + torch.distributed.init_process_group( + backend="nccl", + init_method="env://", + world_size=num_gpus, + rank=current_gpu_index, + ) + torch.cuda.set_device(device) + + +def cleanup(): + torch.distributed.destroy_process_group() + + +def prepare_dataloader(dataset, current_gpu_index, num_gpus, batch_size): + sampler = torch.utils.data.distributed.DistributedSampler( + dataset, + num_replicas=num_gpus, + rank=current_gpu_index, + shuffle=False, + ) + dataloader = torch.utils.data.DataLoader( + dataset, + sampler=sampler, + batch_size=batch_size, + shuffle=False, + ) + return dataloader + + +def per_device_launch_fn(current_gpu_index, num_gpu): + # Setup the process groups + setup_device(current_gpu_index, num_gpu) + + dataset = get_dataset() + model = get_model() + + # prepare the dataloader + dataloader = prepare_dataloader(dataset, current_gpu_index, num_gpu, batch_size) + + # Instantiate the torch optimizer + optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + + # Instantiate the torch loss function + loss_fn = torch.nn.CrossEntropyLoss() + + # Put model on device + model = model.to(current_gpu_index) + ddp_model = torch.nn.parallel.DistributedDataParallel( + model, device_ids=[current_gpu_index], output_device=current_gpu_index + ) + + train_model(ddp_model, dataloader, num_epochs, optimizer, loss_fn) + + cleanup() +``` + +Time to start multiple processes: + +```python +if __name__ == "__main__": + # We use the "fork" method rather than "spawn" to support notebooks + torch.multiprocessing.start_processes( + per_device_launch_fn, + args=(num_gpu,), + nprocs=num_gpu, + join=True, + start_method="fork", + ) +``` + +That's it! + diff --git a/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd b/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd new file mode 100644 index 0000000000..48d789d6fc --- /dev/null +++ b/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_jax.Rmd @@ -0,0 +1,506 @@ +--- +title: Writing a training loop from scratch in JAX +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/25 +last-modified: 2023/06/25 +description: Writing low-level training & evaluation loops in JAX. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/writing_a_custom_training_loop_in_jax.py +--- + +## Setup + +```python +import os + +# This guide can only be run with the jax backend. +os.environ["KERAS_BACKEND"] = "jax" + +import jax + +# We import TF so we can use tf.data. +import tensorflow as tf +import keras +import numpy as np +``` + +## Introduction + +Keras provides default training and evaluation loops, `fit()` and `evaluate()`. +Their usage is covered in the guide +[Training & evaluation with the built-in methods](/guides/training_with_built_in_methods/). + +If you want to customize the learning algorithm of your model while still leveraging +the convenience of `fit()` +(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and +implement your own `train_step()` method, which +is called repeatedly during `fit()`. + +Now, if you want very low-level control over training & evaluation, you should write +your own training & evaluation loops from scratch. This is what this guide is about. + +## A first end-to-end example + +To write a custom training loop, we need the following ingredients: + +- A model to train, of course. +- An optimizer. You could either use an optimizer from `keras.optimizers`, or +one from the `optax` package. +- A loss function. +- A dataset. The standard in the JAX ecosystem is to load data via `tf.data`, +so that's what we'll use. + +Let's line them up. + +First, let's get the model and the MNIST dataset: + +```python +def get_model(): + inputs = keras.Input(shape=(784,), name="digits") + x1 = keras.layers.Dense(64, activation="relu")(inputs) + x2 = keras.layers.Dense(64, activation="relu")(x1) + outputs = keras.layers.Dense(10, name="predictions")(x2) + model = keras.Model(inputs=inputs, outputs=outputs) + return model + + +model = get_model() + +# Prepare the training dataset. +batch_size = 32 +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() +x_train = np.reshape(x_train, (-1, 784)).astype("float32") +x_test = np.reshape(x_test, (-1, 784)).astype("float32") +y_train = keras.utils.to_categorical(y_train) +y_test = keras.utils.to_categorical(y_test) + +# Reserve 10,000 samples for validation. +x_val = x_train[-10000:] +y_val = y_train[-10000:] +x_train = x_train[:-10000] +y_train = y_train[:-10000] + +# Prepare the training dataset. +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size) + +# Prepare the validation dataset. +val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) +val_dataset = val_dataset.batch(batch_size) +``` + +Next, here's the loss function and the optimizer. +We'll use a Keras optimizer in this case. + +```python +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Instantiate an optimizer. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +``` + +### Getting gradients in JAX + +Let's train our model using mini-batch gradient with a custom training loop. + +In JAX, gradients are computed via *metaprogramming*: you call the `jax.grad` (or +`jax.value_and_grad` on a function in order to create a gradient-computing function +for that first function. + +So the first thing we need is a function that returns the loss value. +That's the function we'll use to generate the gradient function. Something like this: + +```python +def compute_loss(x, y): + ... + return loss +``` + +Once you have such a function, you can compute gradients via metaprogramming as such: + +```python +grad_fn = jax.grad(compute_loss) +grads = grad_fn(x, y) +``` + +Typically, you don't just want to get the gradient values, you also want to get +the loss value. You can do this by using `jax.value_and_grad` instead of `jax.grad`: + +```python +grad_fn = jax.value_and_grad(compute_loss) +loss, grads = grad_fn(x, y) +``` + +### JAX computation is purely stateless + +In JAX, everything must be a stateless function -- so our loss computation function +must be stateless as well. That means that all Keras variables (e.g. weight tensors) +must be passed as function inputs, and any variable that has been updated during the +forward pass must be returned as function output. The function have no side effect. + +During the forward pass, the non-trainable variables of a Keras model might get +updated. These variables could be, for instance, RNG seed state variables or +BatchNormalization statistics. We're going to need to return those. So we need +something like this: + +```python +def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y): + ... + return loss, non_trainable_variables +``` + +Once you have such a function, you can get the gradient function by +specifying `hax_aux` in `value_and_grad`: it tells JAX that the loss +computation function returns more outputs than just the loss. Note that the loss +should always be the first output. + +```python +grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True) +(loss, non_trainable_variables), grads = grad_fn( + trainable_variables, non_trainable_variables, x, y +) +``` + +Now that we have established the basics, +let's implement this `compute_loss_and_updates` function. +Keras models have a `stateless_call` method which will come in handy here. +It works just like `model.__call__`, but it requires you to explicitly +pass the value of all the variables in the model, and it returns not just +the `__call__` outputs but also the (potentially updated) non-trainable +variables. + +```python +def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y): + y_pred, non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss = loss_fn(y, y_pred) + return loss, non_trainable_variables +``` + +Let's get the gradient function: + +```python +grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True) +``` + +### The training step function + +Next, let's implement the end-to-end training step, the function +that will both run the forward pass, compute the loss, compute the gradients, +but also use the optimizer to update the trainable variables. This function +also needs to be stateless, so it will get as input a `state` tuple that +includes every state element we're going to use: + +- `trainable_variables` and `non_trainable_variables`: the model's variables. +- `optimizer_variables`: the optimizer's state variables, +such as momentum accumulators. + +To update the trainable variables, we use the optimizer's stateless method +`stateless_apply`. It's equivalent to `optimizer.apply()`, but it requires +always passing `trainable_variables` and `optimizer_variables`. It returns +both the updated trainable variables and the updated optimizer_variables. + +```python +def train_step(state, data): + trainable_variables, non_trainable_variables, optimizer_variables = state + x, y = data + (loss, non_trainable_variables), grads = grad_fn( + trainable_variables, non_trainable_variables, x, y + ) + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + # Return updated state + return loss, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + ) +``` + +### Make it fast with `jax.jit` + +By default, JAX operations run eagerly, +just like in TensorFlow eager mode and PyTorch eager mode. +And just like TensorFlow eager mode and PyTorch eager mode, it's pretty slow +-- eager mode is better used as a debugging environment, not as a way to do +any actual work. So let's make our `train_step` fast by compiling it. + +When you have a stateless JAX function, you can compile it to XLA via the +`@jax.jit` decorator. It will get traced during its first execution, and in +subsequent executions you will be executing the traced graph (this is just +like `@tf.function(jit_compile=True)`. Let's try it: + +```python +@jax.jit +def train_step(state, data): + trainable_variables, non_trainable_variables, optimizer_variables = state + x, y = data + (loss, non_trainable_variables), grads = grad_fn( + trainable_variables, non_trainable_variables, x, y + ) + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + # Return updated state + return loss, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + ) +``` + +We're now ready to train our model. The training loop itself +is trivial: we just repeatedly call `loss, state = train_step(state, data)`. + +Note: + +- We convert the TF tensors yielded by the `tf.data.Dataset` to NumPy +before passing them to our JAX function. +- All variables must be built beforehand: +the model must be built and the optimizer must be built. Since we're using a +Functional API model, it's already built, but if it were a subclassed model +you'd need to call it on a batch of data to build it. + +```python +# Build optimizer variables. +optimizer.build(model.trainable_variables) + +trainable_variables = model.trainable_variables +non_trainable_variables = model.non_trainable_variables +optimizer_variables = optimizer.variables +state = trainable_variables, non_trainable_variables, optimizer_variables + +# Training loop +for step, data in enumerate(train_dataset): + data = (data[0].numpy(), data[1].numpy()) + loss, state = train_step(state, data) + # Log every 100 batches. + if step % 100 == 0: + print(f"Training loss (for 1 batch) at step {step}: {float(loss):.4f}") + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +A key thing to notice here is that the loop is entirely stateless -- the variables +attached to the model (`model.weights`) are never getting updated during the loop. +Their new values are only stored in the `state` tuple. That means that at some point, +before saving the model, you should be attaching the new variable values back to the model. + +Just call `variable.assign(new_value)` on each model variable you want to update: + +```python +trainable_variables, non_trainable_variables, optimizer_variables = state +for variable, value in zip(model.trainable_variables, trainable_variables): + variable.assign(value) +for variable, value in zip(model.non_trainable_variables, non_trainable_variables): + variable.assign(value) +``` + +## Low-level handling of metrics + +Let's add metrics monitoring to this basic training loop. + +You can readily reuse built-in Keras metrics (or custom ones you wrote) in such training +loops written from scratch. Here's the flow: + +- Instantiate the metric at the start of the loop +- Include `metric_variables` in the `train_step` arguments +and `compute_loss_and_updates` arguments. +- Call `metric.stateless_update_state()` in the `compute_loss_and_updates` function. +It's equivalent to `update_state()` -- only stateless. +- When you need to display the current value of the metric, outside the `train_step` +(in the eager scope), attach the new metric variable values to the metric object +and vall `metric.result()`. +- Call `metric.reset_state()` when you need to clear the state of the metric +(typically at the end of an epoch) + +Let's use this knowledge to compute `CategoricalAccuracy` on training and +validation data at the end of training: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.CategoricalAccuracy() +val_acc_metric = keras.metrics.CategoricalAccuracy() + + +def compute_loss_and_updates( + trainable_variables, non_trainable_variables, metric_variables, x, y +): + y_pred, non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss = loss_fn(y, y_pred) + metric_variables = train_acc_metric.stateless_update_state( + metric_variables, y, y_pred + ) + return loss, (non_trainable_variables, metric_variables) + + +grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True) + + +@jax.jit +def train_step(state, data): + ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, + ) = state + x, y = data + (loss, (non_trainable_variables, metric_variables)), grads = grad_fn( + trainable_variables, non_trainable_variables, metric_variables, x, y + ) + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + # Return updated state + return loss, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, + ) +``` + +We'll also prepare an evaluation step function: + +```python +@jax.jit +def eval_step(state, data): + trainable_variables, non_trainable_variables, metric_variables = state + x, y = data + y_pred, non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss = loss_fn(y, y_pred) + metric_variables = val_acc_metric.stateless_update_state( + metric_variables, y, y_pred + ) + return loss, ( + trainable_variables, + non_trainable_variables, + metric_variables, + ) +``` + +Here are our loops: + +```python +# Build optimizer variables. +optimizer.build(model.trainable_variables) + +trainable_variables = model.trainable_variables +non_trainable_variables = model.non_trainable_variables +optimizer_variables = optimizer.variables +metric_variables = train_acc_metric.variables +state = ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, +) + +# Training loop +for step, data in enumerate(train_dataset): + data = (data[0].numpy(), data[1].numpy()) + loss, state = train_step(state, data) + # Log every 100 batches. + if step % 100 == 0: + print(f"Training loss (for 1 batch) at step {step}: {float(loss):.4f}") + _, _, _, metric_variables = state + for variable, value in zip(train_acc_metric.variables, metric_variables): + variable.assign(value) + print(f"Training accuracy: {train_acc_metric.result()}") + print(f"Seen so far: {(step + 1) * batch_size} samples") + +metric_variables = val_acc_metric.variables +( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, +) = state +state = trainable_variables, non_trainable_variables, metric_variables + +# Eval loop +for step, data in enumerate(val_dataset): + data = (data[0].numpy(), data[1].numpy()) + loss, state = eval_step(state, data) + # Log every 100 batches. + if step % 100 == 0: + print(f"Validation loss (for 1 batch) at step {step}: {float(loss):.4f}") + _, _, metric_variables = state + for variable, value in zip(val_acc_metric.variables, metric_variables): + variable.assign(value) + print(f"Validation accuracy: {val_acc_metric.result()}") + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +## Low-level handling of losses tracked by the model + +Layers & models recursively track any losses created during the forward pass +by layers that call `self.add_loss(value)`. The resulting list of scalar loss +values are available via the property `model.losses` +at the end of the forward pass. + +If you want to be using these loss components, you should sum them +and add them to the main loss in your training step. + +Consider this layer, that creates an activity regularization loss: + +```python +class ActivityRegularizationLayer(keras.layers.Layer): + def call(self, inputs): + self.add_loss(1e-2 * jax.numpy.sum(inputs)) + return inputs +``` + +Let's build a really simple model that uses it: + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu")(inputs) +# Insert activity regularization as a layer +x = ActivityRegularizationLayer()(x) +x = keras.layers.Dense(64, activation="relu")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +``` + +Here's what our `compute_loss_and_updates` function should look like now: + +- Pass `return_losses=True` to `model.stateless_call()`. +- Sum the resulting `losses` and add them to the main loss. + +```python +def compute_loss_and_updates( + trainable_variables, non_trainable_variables, metric_variables, x, y +): + y_pred, non_trainable_variables, losses = model.stateless_call( + trainable_variables, non_trainable_variables, x, return_losses=True + ) + loss = loss_fn(y, y_pred) + if losses: + loss += jax.numpy.sum(losses) + metric_variables = train_acc_metric.stateless_update_state( + metric_variables, y, y_pred + ) + return loss, non_trainable_variables, metric_variables +``` + +That's it! + diff --git a/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_torch.Rmd b/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_torch.Rmd new file mode 100644 index 0000000000..f3dc5ad93d --- /dev/null +++ b/.tether/vignettes-src/parked/_writing_a_custom_training_loop_in_torch.Rmd @@ -0,0 +1,382 @@ +--- +title: Writing a training loop from scratch in PyTorch +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/25 +last-modified: 2023/06/25 +description: Writing low-level training & evaluation loops in PyTorch. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/writing_a_custom_training_loop_in_torch.py +--- + +## Setup + +```python +import os + +# This guide can only be run with the torch backend. +os.environ["KERAS_BACKEND"] = "torch" + +import torch +import keras +import numpy as np +``` + +## Introduction + +Keras provides default training and evaluation loops, `fit()` and `evaluate()`. +Their usage is covered in the guide +[Training & evaluation with the built-in methods](/guides/training_with_built_in_methods/). + +If you want to customize the learning algorithm of your model while still leveraging +the convenience of `fit()` +(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and +implement your own `train_step()` method, which +is called repeatedly during `fit()`. + +Now, if you want very low-level control over training & evaluation, you should write +your own training & evaluation loops from scratch. This is what this guide is about. + +## A first end-to-end example + +To write a custom training loop, we need the following ingredients: + +- A model to train, of course. +- An optimizer. You could either use a `keras.optimizers` optimizer, +or a native PyTorch optimizer from `torch.optim`. +- A loss function. You could either use a `keras.losses` loss, +or a native PyTorch loss from `torch.nn`. +- A dataset. You could use any format: a `tf.data.Dataset`, +a PyTorch `DataLoader`, a Python generator, etc. + +Let's line them up. We'll use torch-native objects in each case -- +except, of course, for the Keras model. + +First, let's get the model and the MNIST dataset: + +```python +# Let's consider a simple MNIST model +def get_model(): + inputs = keras.Input(shape=(784,), name="digits") + x1 = keras.layers.Dense(64, activation="relu")(inputs) + x2 = keras.layers.Dense(64, activation="relu")(x1) + outputs = keras.layers.Dense(10, name="predictions")(x2) + model = keras.Model(inputs=inputs, outputs=outputs) + return model + + +# Create load up the MNIST dataset and put it in a torch DataLoader +# Prepare the training dataset. +batch_size = 32 +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() +x_train = np.reshape(x_train, (-1, 784)).astype("float32") +x_test = np.reshape(x_test, (-1, 784)).astype("float32") +y_train = keras.utils.to_categorical(y_train) +y_test = keras.utils.to_categorical(y_test) + +# Reserve 10,000 samples for validation. +x_val = x_train[-10000:] +y_val = y_train[-10000:] +x_train = x_train[:-10000] +y_train = y_train[:-10000] + +# Create torch Datasets +train_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_train), torch.from_numpy(y_train) +) +val_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_val), torch.from_numpy(y_val) +) + +# Create DataLoaders for the Datasets +train_dataloader = torch.utils.data.DataLoader( + train_dataset, batch_size=batch_size, shuffle=True +) +val_dataloader = torch.utils.data.DataLoader( + val_dataset, batch_size=batch_size, shuffle=False +) +``` + +Next, here's our PyTorch optimizer and our PyTorch loss function: + +```python +# Instantiate a torch optimizer +model = get_model() +optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + +# Instantiate a torch loss function +loss_fn = torch.nn.CrossEntropyLoss() +``` + +Let's train our model using mini-batch gradient with a custom training loop. + +Calling `loss.backward()` on a loss tensor triggers backpropagation. +Once that's done, your optimizer is magically aware of the gradients for each variable +and can update its variables, which is done via `optimizer.step()`. +Tensors, variables, optimizers are all interconnected to one another via hidden global state. +Also, don't forget to call `model.zero_grad()` before `loss.backward()`, or you won't +get the right gradients for your variables. + +Here's our training loop, step by step: + +- We open a `for` loop that iterates over epochs +- For each epoch, we open a `for` loop that iterates over the dataset, in batches +- For each batch, we call the model on the input data to retrieve the predictions, +then we use them to compute a loss value +- We call `loss.backward()` to +- Outside the scope, we retrieve the gradients of the weights +of the model with regard to the loss +- Finally, we use the optimizer to update the weights of the model based on the +gradients + +```python +epochs = 3 +for epoch in range(epochs): + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(logits, targets) + + # Backward pass + model.zero_grad() + loss.backward() + + # Optimizer variable updates + optimizer.step() + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +As an alternative, let's look at what the loop looks like when using a Keras optimizer +and a Keras loss function. + +Important differences: + +- You retrieve the gradients for the variables via `v.value.grad`, +called on each trainable variable. +- You update your variables via `optimizer.apply()`, which must be +called in a `torch.no_grad()` scope. + +**Also, a big gotcha:** while all NumPy/TensorFlow/JAX/Keras APIs +as well as Python `unittest` APIs use the argument order convention +`fn(y_true, y_pred)` (reference values first, predicted values second), +PyTorch actually uses `fn(y_pred, y_true)` for its losses. +So make sure to invert the order of `logits` and `targets`. + +```python +model = get_model() +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(targets, logits) + + # Backward pass + model.zero_grad() + trainable_weights = [v for v in model.trainable_weights] + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + optimizer.apply(gradients, trainable_weights) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +## Low-level handling of metrics + +Let's add metrics monitoring to this basic training loop. + +You can readily reuse built-in Keras metrics (or custom ones you wrote) in such training +loops written from scratch. Here's the flow: + +- Instantiate the metric at the start of the loop +- Call `metric.update_state()` after each batch +- Call `metric.result()` when you need to display the current value of the metric +- Call `metric.reset_state()` when you need to clear the state of the metric +(typically at the end of an epoch) + +Let's use this knowledge to compute `CategoricalAccuracy` on training and +validation data at the end of each epoch: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.CategoricalAccuracy() +val_acc_metric = keras.metrics.CategoricalAccuracy() +``` + +Here's our training & evaluation loop: + +```python +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(targets, logits) + + # Backward pass + model.zero_grad() + trainable_weights = [v for v in model.trainable_weights] + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + optimizer.apply(gradients, trainable_weights) + + # Update training metric. + train_acc_metric.update_state(targets, logits) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataloader: + val_logits = model(x_batch_val, training=False) + # Update val metrics + val_acc_metric.update_state(y_batch_val, val_logits) + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") +``` + +## Low-level handling of losses tracked by the model + +Layers & models recursively track any losses created during the forward pass +by layers that call `self.add_loss(value)`. The resulting list of scalar loss +values are available via the property `model.losses` +at the end of the forward pass. + +If you want to be using these loss components, you should sum them +and add them to the main loss in your training step. + +Consider this layer, that creates an activity regularization loss: + +```python +class ActivityRegularizationLayer(keras.layers.Layer): + def call(self, inputs): + self.add_loss(1e-2 * torch.sum(inputs)) + return inputs +``` + +Let's build a really simple model that uses it: + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu")(inputs) +# Insert activity regularization as a layer +x = ActivityRegularizationLayer()(x) +x = keras.layers.Dense(64, activation="relu")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +``` + +Here's what our training loop should look like now: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.CategoricalAccuracy() +val_acc_metric = keras.metrics.CategoricalAccuracy() + +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(targets, logits) + if model.losses: + loss = loss + torch.sum(*model.losses) + + # Backward pass + model.zero_grad() + trainable_weights = [v for v in model.trainable_weights] + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + optimizer.apply(gradients, trainable_weights) + + # Update training metric. + train_acc_metric.update_state(targets, logits) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataloader: + val_logits = model(x_batch_val, training=False) + # Update val metrics + val_acc_metric.update_state(y_batch_val, val_logits) + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") +``` + +That's it! + diff --git a/.tether/vignettes-src/parked/customizing_saving_and_serialization.Rmd b/.tether/vignettes-src/parked/customizing_saving_and_serialization.Rmd new file mode 100644 index 0000000000..c2f744a3f7 --- /dev/null +++ b/.tether/vignettes-src/parked/customizing_saving_and_serialization.Rmd @@ -0,0 +1,328 @@ +--- +title: Customizing Saving and Serialization +author: Neel Kovelamudi +date-created: 2023/03/15 +last-modified: 2023/03/15 +description: A more advanced guide on customizing saving for your layers and models. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/customizing_saving_and_serialization.py +--- + +## Introduction + +This guide covers advanced methods that can be customized in Keras saving. For most +users, the methods outlined in the primary +[Serialize, save, and export guide](https://keras.io/guides/serialization_and_saving) +are sufficient. + +### APIs +We will cover the following APIs: + +- `save_assets()` and `load_assets()` +- `save_own_variables()` and `load_own_variables()` +- `get_build_config()` and `build_from_config()` +- `get_compile_config()` and `compile_from_config()` + +When restoring a model, these get executed in the following order: + +- `build_from_config()` +- `compile_from_config()` +- `load_own_variables()` +- `load_assets()` + +## Setup + +```python +import os +import numpy as np +import keras +``` + +## State saving customization + +These methods determine how the state of your model's layers is saved when calling +`model.save()`. You can override them to take full control of the state saving process. + +### `save_own_variables()` and `load_own_variables()` + +These methods save and load the state variables of the layer when `model.save()` and +`keras.models.load_model()` are called, respectively. By default, the state variables +saved and loaded are the weights of the layer (both trainable and non-trainable). Here is +the default implementation of `save_own_variables()`: + +```python +def save_own_variables(self, store): + all_vars = self._trainable_weights + self._non_trainable_weights + for i, v in enumerate(all_vars): + store[f"{i}"] = v.numpy() +``` + +The store used by these methods is a dictionary that can be populated with the layer +variables. Let's take a look at an example customizing this. + +**Example:** + +```python +@keras.utils.register_keras_serializable(package="my_custom_package") +class LayerWithCustomVariable(keras.layers.Dense): + def __init__(self, units, **kwargs): + super().__init__(units, **kwargs) + self.my_variable = keras.Variable( + np.random.random((units,)), name="my_variable", dtype="float32" + ) + + def save_own_variables(self, store): + super().save_own_variables(store) + # Stores the value of the variable upon saving + store["variables"] = self.my_variable.numpy() + + def load_own_variables(self, store): + # Assigns the value of the variable upon loading + self.my_variable.assign(store["variables"]) + # Load the remaining weights + for i, v in enumerate(self.weights): + v.assign(store[f"{i}"]) + # Note: You must specify how all variables (including layer weights) + # are loaded in `load_own_variables.` + + def call(self, inputs): + dense_out = super().call(inputs) + return dense_out + self.my_variable + + +model = keras.Sequential([LayerWithCustomVariable(1)]) + +ref_input = np.random.random((8, 10)) +ref_output = np.random.random((8, 10)) +model.compile(optimizer="adam", loss="mean_squared_error") +model.fit(ref_input, ref_output) + +model.save("custom_vars_model.keras") +restored_model = keras.models.load_model("custom_vars_model.keras") + +np.testing.assert_allclose( + model.layers[0].my_variable.numpy(), + restored_model.layers[0].my_variable.numpy(), +) +``` + +### `save_assets()` and `load_assets()` + +These methods can be added to your model class definition to store and load any +additional information that your model needs. + +For example, NLP domain layers such as TextVectorization layers and IndexLookup layers +may need to store their associated vocabulary (or lookup table) in a text file upon +saving. + +Let's take at the basics of this workflow with a simple file `assets.txt`. + +**Example:** + +```python +@keras.saving.register_keras_serializable(package="my_custom_package") +class LayerWithCustomAssets(keras.layers.Dense): + def __init__(self, vocab=None, *args, **kwargs): + super().__init__(*args, **kwargs) + self.vocab = vocab + + def save_assets(self, inner_path): + # Writes the vocab (sentence) to text file at save time. + with open(os.path.join(inner_path, "vocabulary.txt"), "w") as f: + f.write(self.vocab) + + def load_assets(self, inner_path): + # Reads the vocab (sentence) from text file at load time. + with open(os.path.join(inner_path, "vocabulary.txt"), "r") as f: + text = f.read() + self.vocab = text.replace("", "little") + + +model = keras.Sequential( + [LayerWithCustomAssets(vocab="Mary had a lamb.", units=5)] +) + +x = np.random.random((10, 10)) +y = model(x) + +model.save("custom_assets_model.keras") +restored_model = keras.models.load_model("custom_assets_model.keras") + +np.testing.assert_string_equal( + restored_model.layers[0].vocab, "Mary had a little lamb." +) +``` + +## `build` and `compile` saving customization + +### `get_build_config()` and `build_from_config()` + +These methods work together to save the layer's built states and restore them upon +loading. + +By default, this only includes a build config dictionary with the layer's input shape, +but overriding these methods can be used to include further Variables and Lookup Tables +that can be useful to restore for your built model. + +**Example:** + +```python +@keras.saving.register_keras_serializable(package="my_custom_package") +class LayerWithCustomBuild(keras.layers.Layer): + def __init__(self, units=32, **kwargs): + super().__init__(**kwargs) + self.units = units + + def call(self, inputs): + return keras.ops.matmul(inputs, self.w) + self.b + + def get_config(self): + return dict(units=self.units, **super().get_config()) + + def build(self, input_shape, layer_init): + # Note the overriding of `build()` to add an extra argument. + # Therefore, we will need to manually call build with `layer_init` argument + # before the first execution of `call()`. + super().build(input_shape) + self._input_shape = input_shape + self.w = self.add_weight( + shape=(input_shape[-1], self.units), + initializer=layer_init, + trainable=True, + ) + self.b = self.add_weight( + shape=(self.units,), + initializer=layer_init, + trainable=True, + ) + self.layer_init = layer_init + + def get_build_config(self): + build_config = { + "layer_init": self.layer_init, + "input_shape": self._input_shape, + } # Stores our initializer for `build()` + return build_config + + def build_from_config(self, config): + # Calls `build()` with the parameters at loading time + self.build(config["input_shape"], config["layer_init"]) + + +custom_layer = LayerWithCustomBuild(units=16) +custom_layer.build(input_shape=(8,), layer_init="random_normal") + +model = keras.Sequential( + [ + custom_layer, + keras.layers.Dense(1, activation="sigmoid"), + ] +) + +x = np.random.random((16, 8)) +y = model(x) + +model.save("custom_build_model.keras") +restored_model = keras.models.load_model("custom_build_model.keras") + +np.testing.assert_equal(restored_model.layers[0].layer_init, "random_normal") +np.testing.assert_equal(restored_model.built, True) +``` + +### `get_compile_config()` and `compile_from_config()` + +These methods work together to save the information with which the model was compiled +(optimizers, losses, etc.) and restore and re-compile the model with this information. + +Overriding these methods can be useful for compiling the restored model with custom +optimizers, custom losses, etc., as these will need to be deserialized prior to calling +`model.compile` in `compile_from_config()`. + +Let's take a look at an example of this. + +**Example:** + +```python +@keras.saving.register_keras_serializable(package="my_custom_package") +def small_square_sum_loss(y_true, y_pred): + loss = keras.ops.square(y_pred - y_true) + loss = loss / 10.0 + loss = keras.ops.sum(loss, axis=1) + return loss + + +@keras.saving.register_keras_serializable(package="my_custom_package") +def mean_pred(y_true, y_pred): + return keras.ops.mean(y_pred) + + +@keras.saving.register_keras_serializable(package="my_custom_package") +class ModelWithCustomCompile(keras.Model): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.dense1 = keras.layers.Dense(8, activation="relu") + self.dense2 = keras.layers.Dense(4, activation="softmax") + + def call(self, inputs): + x = self.dense1(inputs) + return self.dense2(x) + + def compile(self, optimizer, loss_fn, metrics): + super().compile(optimizer=optimizer, loss=loss_fn, metrics=metrics) + self.model_optimizer = optimizer + self.loss_fn = loss_fn + self.loss_metrics = metrics + + def get_compile_config(self): + # These parameters will be serialized at saving time. + return { + "model_optimizer": self.model_optimizer, + "loss_fn": self.loss_fn, + "metric": self.loss_metrics, + } + + def compile_from_config(self, config): + # Deserializes the compile parameters (important, since many are custom) + optimizer = keras.utils.deserialize_keras_object(config["model_optimizer"]) + loss_fn = keras.utils.deserialize_keras_object(config["loss_fn"]) + metrics = keras.utils.deserialize_keras_object(config["metric"]) + + # Calls compile with the deserialized parameters + self.compile(optimizer=optimizer, loss_fn=loss_fn, metrics=metrics) + + +model = ModelWithCustomCompile() +model.compile( + optimizer="SGD", loss_fn=small_square_sum_loss, metrics=["accuracy", mean_pred] +) + +x = np.random.random((4, 8)) +y = np.random.random((4,)) + +model.fit(x, y) + +model.save("custom_compile_model.keras") +restored_model = keras.models.load_model("custom_compile_model.keras") + +np.testing.assert_equal(model.model_optimizer, restored_model.model_optimizer) +np.testing.assert_equal(model.loss_fn, restored_model.loss_fn) +np.testing.assert_equal(model.loss_metrics, restored_model.loss_metrics) +``` + +## Conclusion + +Using the methods learned in this tutorial allows for a wide variety of use cases, +allowing the saving and loading of complex models with exotic assets and state +elements. To recap: + +- `save_own_variables` and `load_own_variables` determine how your states are saved +and loaded. +- `save_assets` and `load_assets` can be added to store and load any additional +information your model needs. +- `get_build_config` and `build_from_config` save and restore the model's built +states. +- `get_compile_config` and `compile_from_config` save and restore the model's +compiled states. + diff --git a/.tether/vignettes-src/preprocessing_layers.Rmd b/.tether/vignettes-src/preprocessing_layers.Rmd new file mode 100644 index 0000000000..0e9763ac3e --- /dev/null +++ b/.tether/vignettes-src/preprocessing_layers.Rmd @@ -0,0 +1,580 @@ +--- +title: Working with preprocessing layers +authors: Francois Chollet, Mark Omernick +date-created: 2020/07/25 +last-modified: 2021/04/23 +description: Overview of how to leverage preprocessing layers to create end-to-end + models. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette) +--- + +## Keras preprocessing + +The Keras preprocessing layers API allows developers to build Keras-native input +processing pipelines. These input processing pipelines can be used as independent +preprocessing code in non-Keras workflows, combined directly with Keras models, and +exported as part of a Keras SavedModel. + +With Keras preprocessing layers, you can build and export models that are truly +end-to-end: models that accept raw images or raw structured data as input; models that +handle feature normalization or feature value indexing on their own. + +## Available preprocessing + +### Text preprocessing + +- `tf.keras.layers.TextVectorization`: turns raw strings into an encoded + representation that can be read by an `Embedding` layer or `Dense` layer. + +### Numerical features preprocessing + +- `tf.keras.layers.Normalization`: performs feature-wise normalization of + input features. +- `tf.keras.layers.Discretization`: turns continuous numerical features + into integer categorical features. + +### Categorical features preprocessing + +- `tf.keras.layers.CategoryEncoding`: turns integer categorical features + into one-hot, multi-hot, or count dense representations. +- `tf.keras.layers.Hashing`: performs categorical feature hashing, also known as + the "hashing trick". +- `tf.keras.layers.StringLookup`: turns string categorical values into an encoded + representation that can be read by an `Embedding` layer or `Dense` layer. +- `tf.keras.layers.IntegerLookup`: turns integer categorical values into an + encoded representation that can be read by an `Embedding` layer or `Dense` + layer. + + +### Image preprocessing + +These layers are for standardizing the inputs of an image model. + +- `tf.keras.layers.Resizing`: resizes a batch of images to a target size. +- `tf.keras.layers.Rescaling`: rescales and offsets the values of a batch of + images (e.g. go from inputs in the `[0, 255]` range to inputs in the `[0, 1]` + range. +- `tf.keras.layers.CenterCrop`: returns a center crop of a batch of images. + +### Image data augmentation + +These layers apply random augmentation transforms to a batch of images. They +are only active during training. + +- `tf.keras.layers.RandomCrop` +- `tf.keras.layers.RandomFlip` +- `tf.keras.layers.RandomTranslation` +- `tf.keras.layers.RandomRotation` +- `tf.keras.layers.RandomZoom` +- `tf.keras.layers.RandomContrast` + +## The `adapt()` method + +Some preprocessing layers have an internal state that can be computed based on +a sample of the training data. The list of stateful preprocessing layers is: + +- `TextVectorization`: holds a mapping between string tokens and integer indices +- `StringLookup` and `IntegerLookup`: hold a mapping between input values and integer +indices. +- `Normalization`: holds the mean and standard deviation of the features. +- `Discretization`: holds information about value bucket boundaries. + +Crucially, these layers are **non-trainable**. Their state is not set during training; it +must be set **before training**, either by initializing them from a precomputed constant, +or by "adapting" them on data. + +You set the state of a preprocessing layer by exposing it to training data, via the +`adapt()` method: + +```python +import numpy as np +import tensorflow as tf +import keras +from keras import layers + +data = np.array( + [ + [0.1, 0.2, 0.3], + [0.8, 0.9, 1.0], + [1.5, 1.6, 1.7], + ] +) +layer = layers.Normalization() +layer.adapt(data) +normalized_data = layer(data) + +print("Features mean: %.2f" % (normalized_data.numpy().mean())) +print("Features std: %.2f" % (normalized_data.numpy().std())) +``` + +The `adapt()` method takes either a Numpy array or a `tf.data.Dataset` object. In the +case of `StringLookup` and `TextVectorization`, you can also pass a list of strings: + +```python +data = [ + "ξεῖν᾽, ἦ τοι μὲν ὄνειροι ἀμήχανοι ἀκριτόμυθοι", + "γίγνοντ᾽, οὐδέ τι πάντα τελείεται ἀνθρώποισι.", + "δοιαὶ γάρ τε πύλαι ἀμενηνῶν εἰσὶν ὀνείρων:", + "αἱ μὲν γὰρ κεράεσσι τετεύχαται, αἱ δ᾽ ἐλέφαντι:", + "τῶν οἳ μέν κ᾽ ἔλθωσι διὰ πριστοῦ ἐλέφαντος,", + "οἵ ῥ᾽ ἐλεφαίρονται, ἔπε᾽ ἀκράαντα φέροντες:", + "οἱ δὲ διὰ ξεστῶν κεράων ἔλθωσι θύραζε,", + "οἵ ῥ᾽ ἔτυμα κραίνουσι, βροτῶν ὅτε κέν τις ἴδηται.", +] +layer = layers.TextVectorization() +layer.adapt(data) +vectorized_text = layer(data) +print(vectorized_text) +``` + +In addition, adaptable layers always expose an option to directly set state via +constructor arguments or weight assignment. If the intended state values are known at +layer construction time, or are calculated outside of the `adapt()` call, they can be set +without relying on the layer's internal computation. For instance, if external vocabulary +files for the `TextVectorization`, `StringLookup`, or `IntegerLookup` layers already +exist, those can be loaded directly into the lookup tables by passing a path to the +vocabulary file in the layer's constructor arguments. + +Here's an example where you instantiate a `StringLookup` layer with precomputed vocabulary: + +```python +vocab = ["a", "b", "c", "d"] +data = tf.constant([["a", "c", "d"], ["d", "z", "b"]]) +layer = layers.StringLookup(vocabulary=vocab) +vectorized_data = layer(data) +print(vectorized_data) +``` + +## Preprocessing data before the model or inside the model + +There are two ways you could be using preprocessing layers: + +**Option 1:** Make them part of the model, like this: + +```python +inputs = keras.Input(shape=input_shape) +x = preprocessing_layer(inputs) +outputs = rest_of_the_model(x) +model = keras.Model(inputs, outputs) +``` + +With this option, preprocessing will happen on device, synchronously with the rest of the +model execution, meaning that it will benefit from GPU acceleration. +If you're training on a GPU, this is the best option for the `Normalization` layer, and for +all image preprocessing and data augmentation layers. + +**Option 2:** apply it to your `tf.data.Dataset`, so as to obtain a dataset that yields +batches of preprocessed data, like this: + +```python +dataset = dataset.map(lambda x, y: (preprocessing_layer(x), y)) +``` + +With this option, your preprocessing will happen on a CPU, asynchronously, and will be +buffered before going into the model. +In addition, if you call `dataset.prefetch(tf.data.AUTOTUNE)` on your dataset, +the preprocessing will happen efficiently in parallel with training: + +```python +dataset = dataset.map(lambda x, y: (preprocessing_layer(x), y)) +dataset = dataset.prefetch(tf.data.AUTOTUNE) +model.fit(dataset, ...) +``` + +This is the best option for `TextVectorization`, and all structured data preprocessing +layers. It can also be a good option if you're training on a CPU and you use image preprocessing +layers. + +Note that the `TextVectorization` layer can only be executed on a CPU, as it is mostly a +dictionary lookup operation. Therefore, if you are training your model on a GPU or a TPU, +you should put the `TextVectorization` layer in the `tf.data` pipeline to get the best performance. + +**When running on a TPU, you should always place preprocessing layers in the `tf.data` pipeline** +(with the exception of `Normalization` and `Rescaling`, which run fine on a TPU and are commonly +used as the first layer in an image model). + +## Benefits of doing preprocessing inside the model at inference time + +Even if you go with option 2, you may later want to export an inference-only end-to-end +model that will include the preprocessing layers. The key benefit to doing this is that +**it makes your model portable** and it **helps reduce the +[training/serving skew](https://developers.google.com/machine-learning/guides/rules-of-ml#training-serving_skew)**. + +When all data preprocessing is part of the model, other people can load and use your +model without having to be aware of how each feature is expected to be encoded & +normalized. Your inference model will be able to process raw images or raw structured +data, and will not require users of the model to be aware of the details of e.g. the +tokenization scheme used for text, the indexing scheme used for categorical features, +whether image pixel values are normalized to `[-1, +1]` or to `[0, 1]`, etc. This is +especially powerful if you're exporting +your model to another runtime, such as TensorFlow.js: you won't have to +reimplement your preprocessing pipeline in JavaScript. + +If you initially put your preprocessing layers in your `tf.data` pipeline, +you can export an inference model that packages the preprocessing. +Simply instantiate a new model that chains +your preprocessing layers and your training model: + +```python +inputs = keras.Input(shape=input_shape) +x = preprocessing_layer(inputs) +outputs = training_model(x) +inference_model = keras.Model(inputs, outputs) +``` + +## Preprocessing during multi-worker training + +Preprocessing layers are compatible with the +[tf.distribute](https://www.tensorflow.org/api_docs/python/tf/distribute) API +for running training across multiple machines. + +In general, preprocessing layers should be placed inside a `tf.distribute.Strategy.scope()` +and called either inside or before the model as discussed above. + +```python +with strategy.scope(): + inputs = keras.Input(shape=input_shape) + preprocessing_layer = tf.keras.layers.Hashing(10) + dense_layer = tf.keras.layers.Dense(16) +``` + +For more details, refer to the _Data preprocessing_ section +of the [Distributed input](https://www.tensorflow.org/tutorials/distribute/input) +tutorial. + +## Quick recipes + +### Image data augmentation + +Note that image data augmentation layers are only active during training (similarly to +the `Dropout` layer). + +```python +from tensorflow import keras +from tensorflow.keras import layers + +# Create a data augmentation stage with horizontal flipping, rotations, zooms +data_augmentation = keras.Sequential( + [ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.1), + layers.RandomZoom(0.1), + ] +) + +# Load some data +(x_train, y_train), _ = keras.datasets.cifar10.load_data() +input_shape = x_train.shape[1:] +classes = 10 + +# Create a tf.data pipeline of augmented images (and their labels) +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.batch(16).map(lambda x, y: (data_augmentation(x), y)) + + +# Create a model and train it on the augmented image data +inputs = keras.Input(shape=input_shape) +x = layers.Rescaling(1.0 / 255)(inputs) # Rescale inputs +outputs = keras.applications.ResNet50( # Add the rest of the model + weights=None, input_shape=input_shape, classes=classes +)(x) +model = keras.Model(inputs, outputs) +model.compile(optimizer="rmsprop", loss="sparse_categorical_crossentropy") +model.fit(train_dataset, steps_per_epoch=5) +``` + +You can see a similar setup in action in the example +[image classification from scratch](https://keras.io/examples/vision/image_classification_from_scratch/). + +### Normalizing numerical features + +```python +# Load some data +(x_train, y_train), _ = keras.datasets.cifar10.load_data() +x_train = x_train.reshape((len(x_train), -1)) +input_shape = x_train.shape[1:] +classes = 10 + +# Create a Normalization layer and set its internal state using the training data +normalizer = layers.Normalization() +normalizer.adapt(x_train) + +# Create a model that include the normalization layer +inputs = keras.Input(shape=input_shape) +x = normalizer(inputs) +outputs = layers.Dense(classes, activation="softmax")(x) +model = keras.Model(inputs, outputs) + +# Train the model +model.compile(optimizer="adam", loss="sparse_categorical_crossentropy") +model.fit(x_train, y_train) +``` + +### Encoding string categorical features via one-hot encoding + +```python +# Define some toy data +data = tf.constant([["a"], ["b"], ["c"], ["b"], ["c"], ["a"]]) + +# Use StringLookup to build an index of the feature values and encode output. +lookup = layers.StringLookup(output_mode="one_hot") +lookup.adapt(data) + +# Convert new test data (which includes unknown feature values) +test_data = tf.constant([["a"], ["b"], ["c"], ["d"], ["e"], [""]]) +encoded_data = lookup(test_data) +print(encoded_data) +``` + +Note that, here, index 0 is reserved for out-of-vocabulary values +(values that were not seen during `adapt()`). + +You can see the `StringLookup` in action in the +[Structured data classification from scratch](https://keras.io/examples/structured_data/structured_data_classification_from_scratch/) +example. + +### Encoding integer categorical features via one-hot encoding + +```python +# Define some toy data +data = tf.constant([[10], [20], [20], [10], [30], [0]]) + +# Use IntegerLookup to build an index of the feature values and encode output. +lookup = layers.IntegerLookup(output_mode="one_hot") +lookup.adapt(data) + +# Convert new test data (which includes unknown feature values) +test_data = tf.constant([[10], [10], [20], [50], [60], [0]]) +encoded_data = lookup(test_data) +print(encoded_data) +``` + +Note that index 0 is reserved for missing values (which you should specify as the value +0), and index 1 is reserved for out-of-vocabulary values (values that were not seen +during `adapt()`). You can configure this by using the `mask_token` and `oov_token` +constructor arguments of `IntegerLookup`. + +You can see the `IntegerLookup` in action in the example +[structured data classification from scratch](https://keras.io/examples/structured_data/structured_data_classification_from_scratch/). + +### Applying the hashing trick to an integer categorical feature + +If you have a categorical feature that can take many different values (on the order of +10e3 or higher), where each value only appears a few times in the data, +it becomes impractical and ineffective to index and one-hot encode the feature values. +Instead, it can be a good idea to apply the "hashing trick": hash the values to a vector +of fixed size. This keeps the size of the feature space manageable, and removes the need +for explicit indexing. + +```python +# Sample data: 10,000 random integers with values between 0 and 100,000 +data = np.random.randint(0, 100000, size=(10000, 1)) + +# Use the Hashing layer to hash the values to the range [0, 64] +hasher = layers.Hashing(num_bins=64, salt=1337) + +# Use the CategoryEncoding layer to multi-hot encode the hashed values +encoder = layers.CategoryEncoding(num_tokens=64, output_mode="multi_hot") +encoded_data = encoder(hasher(data)) +print(encoded_data.shape) +``` + +### Encoding text as a sequence of token indices + +This is how you should preprocess text to be passed to an `Embedding` layer. + +```python +# Define some text data to adapt the layer +adapt_data = tf.constant( + [ + "The Brain is wider than the Sky", + "For put them side by side", + "The one the other will contain", + "With ease and You beside", + ] +) + +# Create a TextVectorization layer +text_vectorizer = layers.TextVectorization(output_mode="int") +# Index the vocabulary via `adapt()` +text_vectorizer.adapt(adapt_data) + +# Try out the layer +print( + "Encoded text:\n", + text_vectorizer(["The Brain is deeper than the sea"]).numpy(), +) + +# Create a simple model +inputs = keras.Input(shape=(None,), dtype="int64") +x = layers.Embedding(input_dim=text_vectorizer.vocabulary_size(), output_dim=16)(inputs) +x = layers.GRU(8)(x) +outputs = layers.Dense(1)(x) +model = keras.Model(inputs, outputs) + +# Create a labeled dataset (which includes unknown tokens) +train_dataset = tf.data.Dataset.from_tensor_slices( + (["The Brain is deeper than the sea", "for if they are held Blue to Blue"], [1, 0]) +) + +# Preprocess the string inputs, turning them into int sequences +train_dataset = train_dataset.batch(2).map(lambda x, y: (text_vectorizer(x), y)) +# Train the model on the int sequences +print("\nTraining model...") +model.compile(optimizer="rmsprop", loss="mse") +model.fit(train_dataset) + +# For inference, you can export a model that accepts strings as input +inputs = keras.Input(shape=(1,), dtype="string") +x = text_vectorizer(inputs) +outputs = model(x) +end_to_end_model = keras.Model(inputs, outputs) + +# Call the end-to-end model on test data (which includes unknown tokens) +print("\nCalling end-to-end model on test string...") +test_data = tf.constant(["The one the other will absorb"]) +test_output = end_to_end_model(test_data) +print("Model output:", test_output) +``` + +You can see the `TextVectorization` layer in action, combined with an `Embedding` mode, +in the example +[text classification from scratch](https://keras.io/examples/nlp/text_classification_from_scratch/). + +Note that when training such a model, for best performance, you should always +use the `TextVectorization` layer as part of the input pipeline. + +### Encoding text as a dense matrix of N-grams with multi-hot encoding + +This is how you should preprocess text to be passed to a `Dense` layer. + +```python +# Define some text data to adapt the layer +adapt_data = tf.constant( + [ + "The Brain is wider than the Sky", + "For put them side by side", + "The one the other will contain", + "With ease and You beside", + ] +) +# Instantiate TextVectorization with "multi_hot" output_mode +# and ngrams=2 (index all bigrams) +text_vectorizer = layers.TextVectorization(output_mode="multi_hot", ngrams=2) +# Index the bigrams via `adapt()` +text_vectorizer.adapt(adapt_data) + +# Try out the layer +print( + "Encoded text:\n", + text_vectorizer(["The Brain is deeper than the sea"]).numpy(), +) + +# Create a simple model +inputs = keras.Input(shape=(text_vectorizer.vocabulary_size(),)) +outputs = layers.Dense(1)(inputs) +model = keras.Model(inputs, outputs) + +# Create a labeled dataset (which includes unknown tokens) +train_dataset = tf.data.Dataset.from_tensor_slices( + (["The Brain is deeper than the sea", "for if they are held Blue to Blue"], [1, 0]) +) + +# Preprocess the string inputs, turning them into int sequences +train_dataset = train_dataset.batch(2).map(lambda x, y: (text_vectorizer(x), y)) +# Train the model on the int sequences +print("\nTraining model...") +model.compile(optimizer="rmsprop", loss="mse") +model.fit(train_dataset) + +# For inference, you can export a model that accepts strings as input +inputs = keras.Input(shape=(1,), dtype="string") +x = text_vectorizer(inputs) +outputs = model(x) +end_to_end_model = keras.Model(inputs, outputs) + +# Call the end-to-end model on test data (which includes unknown tokens) +print("\nCalling end-to-end model on test string...") +test_data = tf.constant(["The one the other will absorb"]) +test_output = end_to_end_model(test_data) +print("Model output:", test_output) +``` + +### Encoding text as a dense matrix of N-grams with TF-IDF weighting + +This is an alternative way of preprocessing text before passing it to a `Dense` layer. + +```python +# Define some text data to adapt the layer +adapt_data = tf.constant( + [ + "The Brain is wider than the Sky", + "For put them side by side", + "The one the other will contain", + "With ease and You beside", + ] +) +# Instantiate TextVectorization with "tf-idf" output_mode +# (multi-hot with TF-IDF weighting) and ngrams=2 (index all bigrams) +text_vectorizer = layers.TextVectorization(output_mode="tf-idf", ngrams=2) +# Index the bigrams and learn the TF-IDF weights via `adapt()` +text_vectorizer.adapt(adapt_data) + +# Try out the layer +print( + "Encoded text:\n", + text_vectorizer(["The Brain is deeper than the sea"]).numpy(), +) + +# Create a simple model +inputs = keras.Input(shape=(text_vectorizer.vocabulary_size(),)) +outputs = layers.Dense(1)(inputs) +model = keras.Model(inputs, outputs) + +# Create a labeled dataset (which includes unknown tokens) +train_dataset = tf.data.Dataset.from_tensor_slices( + (["The Brain is deeper than the sea", "for if they are held Blue to Blue"], [1, 0]) +) + +# Preprocess the string inputs, turning them into int sequences +train_dataset = train_dataset.batch(2).map(lambda x, y: (text_vectorizer(x), y)) +# Train the model on the int sequences +print("\nTraining model...") +model.compile(optimizer="rmsprop", loss="mse") +model.fit(train_dataset) + +# For inference, you can export a model that accepts strings as input +inputs = keras.Input(shape=(1,), dtype="string") +x = text_vectorizer(inputs) +outputs = model(x) +end_to_end_model = keras.Model(inputs, outputs) + +# Call the end-to-end model on test data (which includes unknown tokens) +print("\nCalling end-to-end model on test string...") +test_data = tf.constant(["The one the other will absorb"]) +test_output = end_to_end_model(test_data) +print("Model output:", test_output) +``` + +## Important gotchas + +### Working with lookup layers with very large vocabularies + +You may find yourself working with a very large vocabulary in a `TextVectorization`, a `StringLookup` layer, +or an `IntegerLookup` layer. Typically, a vocabulary larger than 500MB would be considered "very large". + +In such a case, for best performance, you should avoid using `adapt()`. +Instead, pre-compute your vocabulary in advance +(you could use Apache Beam or TF Transform for this) +and store it in a file. Then load the vocabulary into the layer at construction +time by passing the file path as the `vocabulary` argument. + + +### Using lookup layers on a TPU pod or with `ParameterServerStrategy`. + +There is an outstanding issue that causes performance to degrade when using +a `TextVectorization`, `StringLookup`, or `IntegerLookup` layer while +training on a TPU pod or on multiple machines via `ParameterServerStrategy`. +This is slated to be fixed in TensorFlow 2.7. diff --git a/.tether/vignettes-src/sequential_model.Rmd b/.tether/vignettes-src/sequential_model.Rmd new file mode 100644 index 0000000000..1220f4ce9d --- /dev/null +++ b/.tether/vignettes-src/sequential_model.Rmd @@ -0,0 +1,357 @@ +--- +title: The Sequential model +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2020/04/12 +last-modified: 2023/06/25 +description: Complete guide to the Sequential model. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/sequential_model.py +--- + +## Setup + +```python +import keras +from keras import layers +from keras import ops +``` + +## When to use a Sequential model + +A `Sequential` model is appropriate for **a plain stack of layers** +where each layer has **exactly one input tensor and one output tensor**. + +Schematically, the following `Sequential` model: + +```python +# Define Sequential model with 3 layers +model = keras.Sequential( + [ + layers.Dense(2, activation="relu", name="layer1"), + layers.Dense(3, activation="relu", name="layer2"), + layers.Dense(4, name="layer3"), + ] +) +# Call model on a test input +x = ops.ones((3, 3)) +y = model(x) +``` + +is equivalent to this function: + +```python +# Create 3 layers +layer1 = layers.Dense(2, activation="relu", name="layer1") +layer2 = layers.Dense(3, activation="relu", name="layer2") +layer3 = layers.Dense(4, name="layer3") + +# Call layers on a test input +x = ops.ones((3, 3)) +y = layer3(layer2(layer1(x))) +``` + +A Sequential model is **not appropriate** when: + +- Your model has multiple inputs or multiple outputs +- Any of your layers has multiple inputs or multiple outputs +- You need to do layer sharing +- You want non-linear topology (e.g. a residual connection, a multi-branch +model) + +## Creating a Sequential model + +You can create a Sequential model by passing a list of layers to the Sequential +constructor: + +```python +model = keras.Sequential( + [ + layers.Dense(2, activation="relu"), + layers.Dense(3, activation="relu"), + layers.Dense(4), + ] +) +``` + +Its layers are accessible via the `layers` attribute: + +```python +model.layers +``` + +You can also create a Sequential model incrementally via the `add()` method: + +```python +model = keras.Sequential() +model.add(layers.Dense(2, activation="relu")) +model.add(layers.Dense(3, activation="relu")) +model.add(layers.Dense(4)) +``` + +Note that there's also a corresponding `pop()` method to remove layers: +a Sequential model behaves very much like a list of layers. + +```python +model.pop() +print(len(model.layers)) # 2 +``` + +Also note that the Sequential constructor accepts a `name` argument, just like +any layer or model in Keras. This is useful to annotate TensorBoard graphs +with semantically meaningful names. + +```python +model = keras.Sequential(name="my_sequential") +model.add(layers.Dense(2, activation="relu", name="layer1")) +model.add(layers.Dense(3, activation="relu", name="layer2")) +model.add(layers.Dense(4, name="layer3")) +``` + +## Specifying the input shape in advance + +Generally, all layers in Keras need to know the shape of their inputs +in order to be able to create their weights. So when you create a layer like +this, initially, it has no weights: + +```python +layer = layers.Dense(3) +layer.weights # Empty +``` + +It creates its weights the first time it is called on an input, since the shape +of the weights depends on the shape of the inputs: + +```python +# Call layer on a test input +x = ops.ones((1, 4)) +y = layer(x) +layer.weights # Now it has weights, of shape (4, 3) and (3,) +``` + +Naturally, this also applies to Sequential models. When you instantiate a +Sequential model without an input shape, it isn't "built": it has no weights +(and calling +`model.weights` results in an error stating just this). The weights are created +when the model first sees some input data: + +```python +model = keras.Sequential( + [ + layers.Dense(2, activation="relu"), + layers.Dense(3, activation="relu"), + layers.Dense(4), + ] +) # No weights at this stage! + +# At this point, you can't do this: +# model.weights + +# You also can't do this: +# model.summary() + +# Call the model on a test input +x = ops.ones((1, 4)) +y = model(x) +print("Number of weights after calling the model:", len(model.weights)) # 6 +``` + +Once a model is "built", you can call its `summary()` method to display its +contents: + +```python +model.summary() +``` + +However, it can be very useful when building a Sequential model incrementally +to be able to display the summary of the model so far, including the current +output shape. In this case, you should start your model by passing an `Input` +object to your model, so that it knows its input shape from the start: + +```python +model = keras.Sequential() +model.add(keras.Input(shape=(4,))) +model.add(layers.Dense(2, activation="relu")) + +model.summary() +``` + +Note that the `Input` object is not displayed as part of `model.layers`, since +it isn't a layer: + +```python +model.layers +``` + +Models built with a predefined input shape like this always have weights (even +before seeing any data) and always have a defined output shape. + +In general, it's a recommended best practice to always specify the input shape +of a Sequential model in advance if you know what it is. + +## A common debugging workflow: `add()` + `summary()` + +When building a new Sequential architecture, it's useful to incrementally stack +layers with `add()` and frequently print model summaries. For instance, this +enables you to monitor how a stack of `Conv2D` and `MaxPooling2D` layers is +downsampling image feature maps: + +```python +model = keras.Sequential() +model.add(keras.Input(shape=(250, 250, 3))) # 250x250 RGB images +model.add(layers.Conv2D(32, 5, strides=2, activation="relu")) +model.add(layers.Conv2D(32, 3, activation="relu")) +model.add(layers.MaxPooling2D(3)) + +# Can you guess what the current output shape is at this point? Probably not. +# Let's just print it: +model.summary() + +# The answer was: (40, 40, 32), so we can keep downsampling... + +model.add(layers.Conv2D(32, 3, activation="relu")) +model.add(layers.Conv2D(32, 3, activation="relu")) +model.add(layers.MaxPooling2D(3)) +model.add(layers.Conv2D(32, 3, activation="relu")) +model.add(layers.Conv2D(32, 3, activation="relu")) +model.add(layers.MaxPooling2D(2)) + +# And now? +model.summary() + +# Now that we have 4x4 feature maps, time to apply global max pooling. +model.add(layers.GlobalMaxPooling2D()) + +# Finally, we add a classification layer. +model.add(layers.Dense(10)) +``` + +Very practical, right? + +## What to do once you have a model + +Once your model architecture is ready, you will want to: + +- Train your model, evaluate it, and run inference. See our +[guide to training & evaluation with the built-in loops]( + /guides/training_with_built_in_methods/) +- Save your model to disk and restore it. See our +[guide to serialization & saving](/guides/serialization_and_saving/). + +## Feature extraction with a Sequential model + +Once a Sequential model has been built, it behaves like a +[Functional API model](/guides/functional_api/). +This means that every layer has an `input` +and `output` attribute. These attributes can be used to do neat things, like +quickly creating a model that extracts the outputs of all intermediate layers in a +Sequential model: + +```python +initial_model = keras.Sequential( + [ + keras.Input(shape=(250, 250, 3)), + layers.Conv2D(32, 5, strides=2, activation="relu"), + layers.Conv2D(32, 3, activation="relu"), + layers.Conv2D(32, 3, activation="relu"), + ] +) +feature_extractor = keras.Model( + inputs=initial_model.inputs, + outputs=[layer.output for layer in initial_model.layers], +) + +# Call feature extractor on test input. +x = ops.ones((1, 250, 250, 3)) +features = feature_extractor(x) +``` + +Here's a similar example that only extract features from one layer: + +```python +initial_model = keras.Sequential( + [ + keras.Input(shape=(250, 250, 3)), + layers.Conv2D(32, 5, strides=2, activation="relu"), + layers.Conv2D(32, 3, activation="relu", name="my_intermediate_layer"), + layers.Conv2D(32, 3, activation="relu"), + ] +) +feature_extractor = keras.Model( + inputs=initial_model.inputs, + outputs=initial_model.get_layer(name="my_intermediate_layer").output, +) +# Call feature extractor on test input. +x = ops.ones((1, 250, 250, 3)) +features = feature_extractor(x) +``` + +## Transfer learning with a Sequential model + +Transfer learning consists of freezing the bottom layers in a model and only training +the top layers. If you aren't familiar with it, make sure to read our [guide +to transfer learning](/guides/transfer_learning/). + +Here are two common transfer learning blueprint involving Sequential models. + +First, let's say that you have a Sequential model, and you want to freeze all +layers except the last one. In this case, you would simply iterate over +`model.layers` and set `layer.trainable = False` on each layer, except the +last one. Like this: + +```python +model = keras.Sequential([ + keras.Input(shape=(784)), + layers.Dense(32, activation='relu'), + layers.Dense(32, activation='relu'), + layers.Dense(32, activation='relu'), + layers.Dense(10), +]) + +# Presumably you would want to first load pre-trained weights. +model.load_weights(...) + +# Freeze all layers except the last one. +for layer in model.layers[:-1]: + layer.trainable = False + +# Recompile and train (this will only update the weights of the last layer). +model.compile(...) +model.fit(...) +``` + +Another common blueprint is to use a Sequential model to stack a pre-trained +model and some freshly initialized classification layers. Like this: + +```python +# Load a convolutional base with pre-trained weights +base_model = keras.applications.Xception( + weights='imagenet', + include_top=False, + pooling='avg') + +# Freeze the base model +base_model.trainable = False + +# Use a Sequential model to add a trainable classifier on top +model = keras.Sequential([ + base_model, + layers.Dense(1000), +]) + +# Compile & train +model.compile(...) +model.fit(...) +``` + +If you do transfer learning, you will probably find yourself frequently using +these two patterns. + +That's about all you need to know about Sequential models! + +To find out more about building models in Keras, see: + +- [Guide to the Functional API](/guides/functional_api/) +- [Guide to making new Layers & Models via subclassing](/guides/making_new_layers_and_models_via_subclassing/) + diff --git a/.tether/vignettes-src/serialization_and_saving.Rmd b/.tether/vignettes-src/serialization_and_saving.Rmd new file mode 100644 index 0000000000..2c83e33342 --- /dev/null +++ b/.tether/vignettes-src/serialization_and_saving.Rmd @@ -0,0 +1,742 @@ +--- +title: Save, serialize, and export models +authors: Neel Kovelamudi, Francois Chollet +date-created: 2023/06/14 +last-modified: 2023/06/30 +description: Complete guide to saving, serializing, and exporting models. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/serialization_and_saving.py +--- + +## Introduction + +A Keras model consists of multiple components: + +- The architecture, or configuration, which specifies what layers the model +contain, and how they're connected. +- A set of weights values (the "state of the model"). +- An optimizer (defined by compiling the model). +- A set of losses and metrics (defined by compiling the model). + +The Keras API saves all of these pieces together in a unified format, +marked by the `.keras` extension. This is a zip archive consisting of the +following: + +- A JSON-based configuration file (config.json): Records of model, layer, and +other trackables' configuration. +- A H5-based state file, such as `model.weights.h5` (for the whole model), +with directory keys for layers and their weights. +- A metadata file in JSON, storing things such as the current Keras version. + +Let's take a look at how this works. + +## How to save and load a model + +If you only have 10 seconds to read this guide, here's what you need to know. + +**Saving a Keras model:** + +```python +model = ... # Get model (Sequential, Functional Model, or Model subclass) +model.save('path/to/location.keras') # The file needs to end with the .keras extension +``` + +**Loading the model back:** + +```python +model = keras.models.load_model('path/to/location.keras') +``` + +Now, let's look at the details. + +## Setup + +```python +import numpy as np +import keras +from keras import ops +``` + +## Saving + +This section is about saving an entire model to a single file. The file will include: + +- The model's architecture/config +- The model's weight values (which were learned during training) +- The model's compilation information (if `compile()` was called) +- The optimizer and its state, if any (this enables you to restart training +where you left) + +#### APIs + +You can save a model with `model.save()` or `keras.models.save_model()` (which is equivalent). +You can load it back with `keras.models.load_model()`. + +The only supported format in Keras 3 is the "Keras v3" format, +which uses the `.keras` extension. + +**Example:** + +```python +def get_model(): + # Create a simple model. + inputs = keras.Input(shape=(32,)) + outputs = keras.layers.Dense(1)(inputs) + model = keras.Model(inputs, outputs) + model.compile(optimizer=keras.optimizers.Adam(), loss="mean_squared_error") + return model + + +model = get_model() + +# Train the model. +test_input = np.random.random((128, 32)) +test_target = np.random.random((128, 1)) +model.fit(test_input, test_target) + +# Calling `save('my_model.keras')` creates a zip archive `my_model.keras`. +model.save("my_model.keras") + +# It can be used to reconstruct the model identically. +reconstructed_model = keras.models.load_model("my_model.keras") + +# Let's check: +np.testing.assert_allclose( + model.predict(test_input), reconstructed_model.predict(test_input) +) +``` + +### Custom objects + +This section covers the basic workflows for handling custom layers, functions, and +models in Keras saving and reloading. + +When saving a model that includes custom objects, such as a subclassed Layer, +you **must** define a `get_config()` method on the object class. +If the arguments passed to the constructor (`__init__()` method) of the custom object +aren't Python objects (anything other than base types like ints, strings, +etc.), then you **must** also explicitly deserialize these arguments in the `from_config()` +class method. + +Like this: + +```python +class CustomLayer(keras.layers.Layer): + def __init__(self, sublayer, **kwargs): + super().__init__(**kwargs) + self.sublayer = sublayer + + def call(self, x): + return self.sublayer(x) + + def get_config(self): + base_config = super().get_config() + config = { + "sublayer": keras.saving.serialize_keras_object(self.sublayer), + } + return {**base_config, **config} + + @classmethod + def from_config(cls, config): + sublayer_config = config.pop("sublayer") + sublayer = keras.saving.deserialize_keras_object(sublayer_config) + return cls(sublayer, **config) +``` + +Please see the [Defining the config methods section](#config_methods) for more +details and examples. + +The saved `.keras` file is lightweight and does not store the Python code for custom +objects. Therefore, to reload the model, `load_model` requires access to the definition +of any custom objects used through one of the following methods: + +1. Registering custom objects **(preferred)**, +2. Passing custom objects directly when loading, or +3. Using a custom object scope + +Below are examples of each workflow: + +#### Registering custom objects (**preferred**) + +This is the preferred method, as custom object registration greatly simplifies saving and +loading code. Adding the `@keras.saving.register_keras_serializable` decorator to the +class definition of a custom object registers the object globally in a master list, +allowing Keras to recognize the object when loading the model. + +Let's create a custom model involving both a custom layer and a custom activation +function to demonstrate this. + +**Example:** + +```python +# Clear all previously registered custom objects +keras.saving.get_custom_objects().clear() + + +# Upon registration, you can optionally specify a package or a name. +# If left blank, the package defaults to `Custom` and the name defaults to +# the class name. +@keras.saving.register_keras_serializable(package="MyLayers") +class CustomLayer(keras.layers.Layer): + def __init__(self, factor): + super().__init__() + self.factor = factor + + def call(self, x): + return x * self.factor + + def get_config(self): + return {"factor": self.factor} + + +@keras.saving.register_keras_serializable(package="my_package", name="custom_fn") +def custom_fn(x): + return x**2 + + +# Create the model. +def get_model(): + inputs = keras.Input(shape=(4,)) + mid = CustomLayer(0.5)(inputs) + outputs = keras.layers.Dense(1, activation=custom_fn)(mid) + model = keras.Model(inputs, outputs) + model.compile(optimizer="rmsprop", loss="mean_squared_error") + return model + + +# Train the model. +def train_model(model): + input = np.random.random((4, 4)) + target = np.random.random((4, 1)) + model.fit(input, target) + return model + + +test_input = np.random.random((4, 4)) +test_target = np.random.random((4, 1)) + +model = get_model() +model = train_model(model) +model.save("custom_model.keras") + +# Now, we can simply load without worrying about our custom objects. +reconstructed_model = keras.models.load_model("custom_model.keras") + +# Let's check: +np.testing.assert_allclose( + model.predict(test_input), reconstructed_model.predict(test_input) +) +``` + +#### Passing custom objects to `load_model()` + +```python +model = get_model() +model = train_model(model) + +# Calling `save('my_model.keras')` creates a zip archive `my_model.keras`. +model.save("custom_model.keras") + +# Upon loading, pass a dict containing the custom objects used in the +# `custom_objects` argument of `keras.models.load_model()`. +reconstructed_model = keras.models.load_model( + "custom_model.keras", + custom_objects={"CustomLayer": CustomLayer, "custom_fn": custom_fn}, +) + +# Let's check: +np.testing.assert_allclose( + model.predict(test_input), reconstructed_model.predict(test_input) +) +``` + +#### Using a custom object scope + +Any code within the custom object scope will be able to recognize the custom objects +passed to the scope argument. Therefore, loading the model within the scope will allow +the loading of our custom objects. + +**Example:** + +```python +model = get_model() +model = train_model(model) +model.save("custom_model.keras") + +# Pass the custom objects dictionary to a custom object scope and place +# the `keras.models.load_model()` call within the scope. +custom_objects = {"CustomLayer": CustomLayer, "custom_fn": custom_fn} + +with keras.saving.custom_object_scope(custom_objects): + reconstructed_model = keras.models.load_model("custom_model.keras") + +# Let's check: +np.testing.assert_allclose( + model.predict(test_input), reconstructed_model.predict(test_input) +) +``` + +### Model serialization + +This section is about saving only the model's configuration, without its state. +The model's configuration (or architecture) specifies what layers the model +contains, and how these layers are connected. If you have the configuration of a model, +then the model can be created with a freshly initialized state (no weights or compilation +information). + +#### APIs + +The following serialization APIs are available: + +- `keras.models.clone_model(model)`: make a (randomly initialized) copy of a model. +- `get_config()` and `cls.from_config()`: retrieve the configuration of a layer or model, and recreate +a model instance from its config, respectively. +- `keras.models.model_to_json()` and `keras.models.model_from_json()`: similar, but as JSON strings. +- `keras.saving.serialize_keras_object()`: retrieve the configuration any arbitrary Keras object. +- `keras.saving.deserialize_keras_object()`: recreate an object instance from its configuration. + +#### In-memory model cloning + +You can do in-memory cloning of a model via `keras.models.clone_model()`. +This is equivalent to getting the config then recreating the model from its config +(so it does not preserve compilation information or layer weights values). + +**Example:** + +```python +new_model = keras.models.clone_model(model) +``` + +#### `get_config()` and `from_config()` + +Calling `model.get_config()` or `layer.get_config()` will return a Python dict containing +the configuration of the model or layer, respectively. You should define `get_config()` +to contain arguments needed for the `__init__()` method of the model or layer. At loading time, +the `from_config(config)` method will then call `__init__()` with these arguments to +reconstruct the model or layer. + + +**Layer example:** + +```python +layer = keras.layers.Dense(3, activation="relu") +layer_config = layer.get_config() +print(layer_config) +``` + +Now let's reconstruct the layer using the `from_config()` method: + +```python +new_layer = keras.layers.Dense.from_config(layer_config) +``` + +**Sequential model example:** + +```python +model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)]) +config = model.get_config() +new_model = keras.Sequential.from_config(config) +``` + +**Functional model example:** + +```python +inputs = keras.Input((32,)) +outputs = keras.layers.Dense(1)(inputs) +model = keras.Model(inputs, outputs) +config = model.get_config() +new_model = keras.Model.from_config(config) +``` + +#### `to_json()` and `keras.models.model_from_json()` + +This is similar to `get_config` / `from_config`, except it turns the model +into a JSON string, which can then be loaded without the original model class. +It is also specific to models, it isn't meant for layers. + +**Example:** + +```python +model = keras.Sequential([keras.Input((32,)), keras.layers.Dense(1)]) +json_config = model.to_json() +new_model = keras.models.model_from_json(json_config) +``` + +#### Arbitrary object serialization and deserialization + +The `keras.saving.serialize_keras_object()` and `keras.saving.deserialize_keras_object()` +APIs are general-purpose APIs that can be used to serialize or deserialize any Keras +object and any custom object. It is at the foundation of saving model architecture and is +behind all `serialize()`/`deserialize()` calls in keras. + +**Example**: + +```python +my_reg = keras.regularizers.L1(0.005) +config = keras.saving.serialize_keras_object(my_reg) +print(config) +``` + +Note the serialization format containing all the necessary information for proper +reconstruction: + +- `module` containing the name of the Keras module or other identifying module the object +comes from +- `class_name` containing the name of the object's class. +- `config` with all the information needed to reconstruct the object +- `registered_name` for custom objects. See [here](#custom_object_serialization). + +Now we can reconstruct the regularizer. + +```python +new_reg = keras.saving.deserialize_keras_object(config) +``` + +### Model weights saving + +You can choose to only save & load a model's weights. This can be useful if: + +- You only need the model for inference: in this case you won't need to +restart training, so you don't need the compilation information or optimizer state. +- You are doing transfer learning: in this case you will be training a new model +reusing the state of a prior model, so you don't need the compilation +information of the prior model. + +#### APIs for in-memory weight transfer + +Weights can be copied between different objects by using `get_weights()` +and `set_weights()`: + +* `keras.layers.Layer.get_weights()`: Returns a list of NumPy arrays of weight values. +* `keras.layers.Layer.set_weights(weights)`: Sets the model weights to the values +provided (as NumPy arrays). + +Examples: + +***Transferring weights from one layer to another, in memory*** + +```python +def create_layer(): + layer = keras.layers.Dense(64, activation="relu", name="dense_2") + layer.build((None, 784)) + return layer + + +layer_1 = create_layer() +layer_2 = create_layer() + +# Copy weights from layer 1 to layer 2 +layer_2.set_weights(layer_1.get_weights()) +``` + +***Transferring weights from one model to another model with a compatible architecture, in memory*** + +```python +# Create a simple functional model +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs) +x = keras.layers.Dense(64, activation="relu", name="dense_2")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) +functional_model = keras.Model(inputs=inputs, outputs=outputs, name="3_layer_mlp") + + +# Define a subclassed model with the same architecture +class SubclassedModel(keras.Model): + def __init__(self, output_dim, name=None): + super().__init__(name=name) + self.output_dim = output_dim + self.dense_1 = keras.layers.Dense(64, activation="relu", name="dense_1") + self.dense_2 = keras.layers.Dense(64, activation="relu", name="dense_2") + self.dense_3 = keras.layers.Dense(output_dim, name="predictions") + + def call(self, inputs): + x = self.dense_1(inputs) + x = self.dense_2(x) + x = self.dense_3(x) + return x + + def get_config(self): + return {"output_dim": self.output_dim, "name": self.name} + + +subclassed_model = SubclassedModel(10) +# Call the subclassed model once to create the weights. +subclassed_model(np.ones((1, 784))) + +# Copy weights from functional_model to subclassed_model. +subclassed_model.set_weights(functional_model.get_weights()) + +assert len(functional_model.weights) == len(subclassed_model.weights) +for a, b in zip(functional_model.weights, subclassed_model.weights): + np.testing.assert_allclose(a.numpy(), b.numpy()) +``` + +***The case of stateless layers*** + +Because stateless layers do not change the order or number of weights, +models can have compatible architectures even if there are extra/missing +stateless layers. + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs) +x = keras.layers.Dense(64, activation="relu", name="dense_2")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) +functional_model = keras.Model(inputs=inputs, outputs=outputs, name="3_layer_mlp") + +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs) +x = keras.layers.Dense(64, activation="relu", name="dense_2")(x) + +# Add a dropout layer, which does not contain any weights. +x = keras.layers.Dropout(0.5)(x) +outputs = keras.layers.Dense(10, name="predictions")(x) +functional_model_with_dropout = keras.Model( + inputs=inputs, outputs=outputs, name="3_layer_mlp" +) + +functional_model_with_dropout.set_weights(functional_model.get_weights()) +``` + +#### APIs for saving weights to disk & loading them back + +Weights can be saved to disk by calling `model.save_weights(filepath)`. +The filename should end in `.weights.h5`. + +**Example:** + +```python +# Runnable example +sequential_model = keras.Sequential( + [ + keras.Input(shape=(784,), name="digits"), + keras.layers.Dense(64, activation="relu", name="dense_1"), + keras.layers.Dense(64, activation="relu", name="dense_2"), + keras.layers.Dense(10, name="predictions"), + ] +) +sequential_model.save_weights("my_model.weights.h5") +sequential_model.load_weights("my_model.weights.h5") +``` + +Note that changing `layer.trainable` may result in a different +`layer.weights` ordering when the model contains nested layers. + +```python +class NestedDenseLayer(keras.layers.Layer): + def __init__(self, units, name=None): + super().__init__(name=name) + self.dense_1 = keras.layers.Dense(units, name="dense_1") + self.dense_2 = keras.layers.Dense(units, name="dense_2") + + def call(self, inputs): + return self.dense_2(self.dense_1(inputs)) + + +nested_model = keras.Sequential([keras.Input((784,)), NestedDenseLayer(10, "nested")]) +variable_names = [v.name for v in nested_model.weights] +print("variables: {}".format(variable_names)) + +print("\nChanging trainable status of one of the nested layers...") +nested_model.get_layer("nested").dense_1.trainable = False + +variable_names_2 = [v.name for v in nested_model.weights] +print("\nvariables: {}".format(variable_names_2)) +print("variable ordering changed:", variable_names != variable_names_2) +``` + +##### **Transfer learning example** + +When loading pretrained weights from a weights file, it is recommended to load +the weights into the original checkpointed model, and then extract +the desired weights/layers into a new model. + +**Example:** + +```python +def create_functional_model(): + inputs = keras.Input(shape=(784,), name="digits") + x = keras.layers.Dense(64, activation="relu", name="dense_1")(inputs) + x = keras.layers.Dense(64, activation="relu", name="dense_2")(x) + outputs = keras.layers.Dense(10, name="predictions")(x) + return keras.Model(inputs=inputs, outputs=outputs, name="3_layer_mlp") + + +functional_model = create_functional_model() +functional_model.save_weights("pretrained.weights.h5") + +# In a separate program: +pretrained_model = create_functional_model() +pretrained_model.load_weights("pretrained.weights.h5") + +# Create a new model by extracting layers from the original model: +extracted_layers = pretrained_model.layers[:-1] +extracted_layers.append(keras.layers.Dense(5, name="dense_3")) +model = keras.Sequential(extracted_layers) +model.summary() +``` + +### Appendix: Handling custom objects + + +#### Defining the config methods + +Specifications: + +* `get_config()` should return a JSON-serializable dictionary in order to be +compatible with the Keras architecture- and model-saving APIs. +* `from_config(config)` (a `classmethod`) should return a new layer or model +object that is created from the config. +The default implementation returns `cls(**config)`. + +**NOTE**: If all your constructor arguments are already serializable, e.g. strings and +ints, or non-custom Keras objects, overriding `from_config` is not necessary. However, +for more complex objects such as layers or models passed to `__init__`, deserialization +must be handled explicitly either in `__init__` itself or overriding the `from_config()` +method. + +**Example:** + +```python +@keras.saving.register_keras_serializable(package="MyLayers", name="KernelMult") +class MyDense(keras.layers.Layer): + def __init__( + self, + units, + *, + kernel_regularizer=None, + kernel_initializer=None, + nested_model=None, + **kwargs + ): + super().__init__(**kwargs) + self.hidden_units = units + self.kernel_regularizer = kernel_regularizer + self.kernel_initializer = kernel_initializer + self.nested_model = nested_model + + def get_config(self): + config = super().get_config() + # Update the config with the custom layer's parameters + config.update( + { + "units": self.hidden_units, + "kernel_regularizer": self.kernel_regularizer, + "kernel_initializer": self.kernel_initializer, + "nested_model": self.nested_model, + } + ) + return config + + def build(self, input_shape): + input_units = input_shape[-1] + self.kernel = self.add_weight( + name="kernel", + shape=(input_units, self.hidden_units), + regularizer=self.kernel_regularizer, + initializer=self.kernel_initializer, + ) + + def call(self, inputs): + return ops.matmul(inputs, self.kernel) + + +layer = MyDense(units=16, kernel_regularizer="l1", kernel_initializer="ones") +layer3 = MyDense(units=64, nested_model=layer) + +config = keras.layers.serialize(layer3) + +print(config) + +new_layer = keras.layers.deserialize(config) + +print(new_layer) +``` + +Note that overriding `from_config` is unnecessary above for `MyDense` because +`hidden_units`, `kernel_initializer`, and `kernel_regularizer` are ints, strings, and a +built-in Keras object, respectively. This means that the default `from_config` +implementation of `cls(**config)` will work as intended. + +For more complex objects, such as layers and models passed to `__init__`, for +example, you must explicitly deserialize these objects. Let's take a look at an example +of a model where a `from_config` override is necessary. + +**Example:** + + +```python +@keras.saving.register_keras_serializable(package="ComplexModels") +class CustomModel(keras.layers.Layer): + def __init__(self, first_layer, second_layer=None, **kwargs): + super().__init__(**kwargs) + self.first_layer = first_layer + if second_layer is not None: + self.second_layer = second_layer + else: + self.second_layer = keras.layers.Dense(8) + + def get_config(self): + config = super().get_config() + config.update( + { + "first_layer": self.first_layer, + "second_layer": self.second_layer, + } + ) + return config + + @classmethod + def from_config(cls, config): + # Note that you can also use `keras.saving.deserialize_keras_object` here + config["first_layer"] = keras.layers.deserialize(config["first_layer"]) + config["second_layer"] = keras.layers.deserialize(config["second_layer"]) + return cls(**config) + + def call(self, inputs): + return self.first_layer(self.second_layer(inputs)) + + +# Let's make our first layer the custom layer from the previous example (MyDense) +inputs = keras.Input((32,)) +outputs = CustomModel(first_layer=layer)(inputs) +model = keras.Model(inputs, outputs) + +config = model.get_config() +new_model = keras.Model.from_config(config) +``` + + +#### How custom objects are serialized + +The serialization format has a special key for custom objects registered via +`@keras.saving.register_keras_serializable`. This `registered_name` key allows for easy +retrieval at loading/deserialization time while also allowing users to add custom naming. + +Let's take a look at the config from serializing the custom layer `MyDense` we defined +above. + +**Example**: + +```python +layer = MyDense( + units=16, + kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-4), + kernel_initializer="ones", +) +config = keras.layers.serialize(layer) +print(config) +``` + +As shown, the `registered_name` key contains the lookup information for the Keras master +list, including the package `MyLayers` and the custom name `KernelMult` that we gave in +the `@keras.saving.register_keras_serializable` decorator. Take a look again at the custom +class definition/registration [here](#registration_example). + +Note that the `class_name` key contains the original name of the class, allowing for +proper re-initialization in `from_config`. + +Additionally, note that the `module` key is `None` since this is a custom object. + diff --git a/.tether/vignettes-src/training_with_built_in_methods.Rmd b/.tether/vignettes-src/training_with_built_in_methods.Rmd new file mode 100644 index 0000000000..21d6780a81 --- /dev/null +++ b/.tether/vignettes-src/training_with_built_in_methods.Rmd @@ -0,0 +1,1196 @@ +--- +title: Training & evaluation with the built-in methods +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2019/03/01 +last-modified: 2023/06/25 +description: Complete guide to training & evaluation with `fit()` and `evaluate()`. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/training_with_built_in_methods.py +--- + +## Setup + +```python +# We import torch & TF so as to use torch Dataloaders & tf.data.Datasets. +import torch +import tensorflow as tf + +import os +import numpy as np +import keras +from keras import layers +from keras import ops +``` + +## Introduction + +This guide covers training, evaluation, and prediction (inference) models +when using built-in APIs for training & validation (such as `Model.fit()`, +`Model.evaluate()` and `Model.predict()`). + +If you are interested in leveraging `fit()` while specifying your +own training step function, see the guides on customizing what happens in `fit()`: + +- [Writing a custom train step with TensorFlow](/guides/custom_train_step_in_tensorflow/) +- [Writing a custom train step with JAX](/guides/custom_train_step_in_jax/) +- [Writing a custom train step with PyTorch](/guides/custom_train_step_in_torch/) + +If you are interested in writing your own training & evaluation loops from +scratch, see the guides on writing training loops: + +- [Writing a training loop with TensorFlow](/guides/writing_a_custom_training_loop_in_tensorflow/) +- [Writing a training loop with JAX](/guides/writing_a_custom_training_loop_in_jax/) +- [Writing a training loop with PyTorch](/guides/writing_a_custom_training_loop_in_torch/) + +In general, whether you are using built-in loops or writing your own, model training & +evaluation works strictly in the same way across every kind of Keras model -- +Sequential models, models built with the Functional API, and models written from +scratch via model subclassing. + +## API overview: a first end-to-end example + +When passing data to the built-in training loops of a model, you should either use: + +- NumPy arrays (if your data is small and fits in memory) +- Subclasses of `keras.utils.PyDataset` +- `tf.data.Dataset` objects +- PyTorch `DataLoader` instances + +In the next few paragraphs, we'll use the MNIST dataset as NumPy arrays, in +order to demonstrate how to use optimizers, losses, and metrics. Afterwards, we'll +take a close look at each of the other options. + +Let's consider the following model (here, we build in with the Functional API, but it +could be a Sequential model or a subclassed model as well): + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = layers.Dense(64, activation="relu", name="dense_1")(inputs) +x = layers.Dense(64, activation="relu", name="dense_2")(x) +outputs = layers.Dense(10, activation="softmax", name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +``` + +Here's what the typical end-to-end workflow looks like, consisting of: + +- Training +- Validation on a holdout set generated from the original training data +- Evaluation on the test data + +We'll use MNIST data for this example. + +```python +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() + +# Preprocess the data (these are NumPy arrays) +x_train = x_train.reshape(60000, 784).astype("float32") / 255 +x_test = x_test.reshape(10000, 784).astype("float32") / 255 + +y_train = y_train.astype("float32") +y_test = y_test.astype("float32") + +# Reserve 10,000 samples for validation +x_val = x_train[-10000:] +y_val = y_train[-10000:] +x_train = x_train[:-10000] +y_train = y_train[:-10000] +``` + +We specify the training configuration (optimizer, loss, metrics): + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(), # Optimizer + # Loss function to minimize + loss=keras.losses.SparseCategoricalCrossentropy(), + # List of metrics to monitor + metrics=[keras.metrics.SparseCategoricalAccuracy()], +) +``` + +We call `fit()`, which will train the model by slicing the data into "batches" of size +`batch_size`, and repeatedly iterating over the entire dataset for a given number of +`epochs`. + +```python +print("Fit model on training data") +history = model.fit( + x_train, + y_train, + batch_size=64, + epochs=2, + # We pass some validation for + # monitoring validation loss and metrics + # at the end of each epoch + validation_data=(x_val, y_val), +) +``` + +The returned `history` object holds a record of the loss values and metric values +during training: + +```python +print(history.history) +``` + +We evaluate the model on the test data via `evaluate()`: + +```python +# Evaluate the model on the test data using `evaluate` +print("Evaluate on test data") +results = model.evaluate(x_test, y_test, batch_size=128) +print("test loss, test acc:", results) + +# Generate predictions (probabilities -- the output of the last layer) +# on new data using `predict` +print("Generate predictions for 3 samples") +predictions = model.predict(x_test[:3]) +print("predictions shape:", predictions.shape) +``` + +Now, let's review each piece of this workflow in detail. + +## The `compile()` method: specifying a loss, metrics, and an optimizer + +To train a model with `fit()`, you need to specify a loss function, an optimizer, and +optionally, some metrics to monitor. + +You pass these to the model as arguments to the `compile()` method: + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(learning_rate=1e-3), + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy()], +) +``` + +The `metrics` argument should be a list -- your model can have any number of metrics. + +If your model has multiple outputs, you can specify different losses and metrics for +each output, and you can modulate the contribution of each output to the total loss of +the model. You will find more details about this in the **Passing data to multi-input, +multi-output models** section. + +Note that if you're satisfied with the default settings, in many cases the optimizer, +loss, and metrics can be specified via string identifiers as a shortcut: + +```python +model.compile( + optimizer="rmsprop", + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], +) +``` + +For later reuse, let's put our model definition and compile step in functions; we will +call them several times across different examples in this guide. + +```python +def get_uncompiled_model(): + inputs = keras.Input(shape=(784,), name="digits") + x = layers.Dense(64, activation="relu", name="dense_1")(inputs) + x = layers.Dense(64, activation="relu", name="dense_2")(x) + outputs = layers.Dense(10, activation="softmax", name="predictions")(x) + model = keras.Model(inputs=inputs, outputs=outputs) + return model + + +def get_compiled_model(): + model = get_uncompiled_model() + model.compile( + optimizer="rmsprop", + loss="sparse_categorical_crossentropy", + metrics=["sparse_categorical_accuracy"], + ) + return model +``` + +### Many built-in optimizers, losses, and metrics are available + +In general, you won't have to create your own losses, metrics, or optimizers +from scratch, because what you need is likely to be already part of the Keras API: + +Optimizers: + +- `SGD()` (with or without momentum) +- `RMSprop()` +- `Adam()` +- etc. + +Losses: + +- `MeanSquaredError()` +- `KLDivergence()` +- `CosineSimilarity()` +- etc. + +Metrics: + +- `AUC()` +- `Precision()` +- `Recall()` +- etc. + +### Custom losses + +If you need to create a custom loss, Keras provides three ways to do so. + +The first method involves creating a function that accepts inputs `y_true` and +`y_pred`. The following example shows a loss function that computes the mean squared +error between the real data and the predictions: + +```python +def custom_mean_squared_error(y_true, y_pred): + return ops.mean(ops.square(y_true - y_pred), axis=-1) + + +model = get_uncompiled_model() +model.compile(optimizer=keras.optimizers.Adam(), loss=custom_mean_squared_error) + +# We need to one-hot encode the labels to use MSE +y_train_one_hot = ops.one_hot(y_train, num_classes=10) +model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1) +``` + +If you need a loss function that takes in parameters beside `y_true` and `y_pred`, you +can subclass the `keras.losses.Loss` class and implement the following two methods: + +- `__init__(self)`: accept parameters to pass during the call of your loss function +- `call(self, y_true, y_pred)`: use the targets (y_true) and the model predictions +(y_pred) to compute the model's loss + +Let's say you want to use mean squared error, but with an added term that +will de-incentivize prediction values far from 0.5 (we assume that the categorical +targets are one-hot encoded and take values between 0 and 1). This +creates an incentive for the model not to be too confident, which may help +reduce overfitting (we won't know if it works until we try!). + +Here's how you would do it: + +```python +class CustomMSE(keras.losses.Loss): + def __init__(self, regularization_factor=0.1, name="custom_mse"): + super().__init__(name=name) + self.regularization_factor = regularization_factor + + def call(self, y_true, y_pred): + mse = ops.mean(ops.square(y_true - y_pred), axis=-1) + reg = ops.mean(ops.square(0.5 - y_pred), axis=-1) + return mse + reg * self.regularization_factor + + +model = get_uncompiled_model() +model.compile(optimizer=keras.optimizers.Adam(), loss=CustomMSE()) + +y_train_one_hot = ops.one_hot(y_train, num_classes=10) +model.fit(x_train, y_train_one_hot, batch_size=64, epochs=1) +``` + +### Custom metrics + +If you need a metric that isn't part of the API, you can easily create custom metrics +by subclassing the `keras.metrics.Metric` class. You will need to implement 4 +methods: + +- `__init__(self)`, in which you will create state variables for your metric. +- `update_state(self, y_true, y_pred, sample_weight=None)`, which uses the targets +y_true and the model predictions y_pred to update the state variables. +- `result(self)`, which uses the state variables to compute the final results. +- `reset_state(self)`, which reinitializes the state of the metric. + +State update and results computation are kept separate (in `update_state()` and +`result()`, respectively) because in some cases, the results computation might be very +expensive and would only be done periodically. + +Here's a simple example showing how to implement a `CategoricalTruePositives` metric +that counts how many samples were correctly classified as belonging to a given class: + +```python +class CategoricalTruePositives(keras.metrics.Metric): + def __init__(self, name="categorical_true_positives", **kwargs): + super().__init__(name=name, **kwargs) + self.true_positives = self.add_variable( + shape=(), name="ctp", initializer="zeros" + ) + + def update_state(self, y_true, y_pred, sample_weight=None): + y_pred = ops.reshape(ops.argmax(y_pred, axis=1), (-1, 1)) + values = ops.cast(y_true, "int32") == ops.cast(y_pred, "int32") + values = ops.cast(values, "float32") + if sample_weight is not None: + sample_weight = ops.cast(sample_weight, "float32") + values = ops.multiply(values, sample_weight) + self.true_positives.assign_add(ops.sum(values)) + + def result(self): + return self.true_positives.value + + def reset_state(self): + # The state of the metric will be reset at the start of each epoch. + self.true_positives.assign(0.0) + + +model = get_uncompiled_model() +model.compile( + optimizer=keras.optimizers.RMSprop(learning_rate=1e-3), + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[CategoricalTruePositives()], +) +model.fit(x_train, y_train, batch_size=64, epochs=3) +``` + +### Handling losses and metrics that don't fit the standard signature + +The overwhelming majority of losses and metrics can be computed from `y_true` and +`y_pred`, where `y_pred` is an output of your model -- but not all of them. For +instance, a regularization loss may only require the activation of a layer (there are +no targets in this case), and this activation may not be a model output. + +In such cases, you can call `self.add_loss(loss_value)` from inside the call method of +a custom layer. Losses added in this way get added to the "main" loss during training +(the one passed to `compile()`). Here's a simple example that adds activity +regularization (note that activity regularization is built-in in all Keras layers -- +this layer is just for the sake of providing a concrete example): + +```python +class ActivityRegularizationLayer(layers.Layer): + def call(self, inputs): + self.add_loss(ops.sum(inputs) * 0.1) + return inputs # Pass-through layer. + + +inputs = keras.Input(shape=(784,), name="digits") +x = layers.Dense(64, activation="relu", name="dense_1")(inputs) + +# Insert activity regularization as a layer +x = ActivityRegularizationLayer()(x) + +x = layers.Dense(64, activation="relu", name="dense_2")(x) +outputs = layers.Dense(10, name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +model.compile( + optimizer=keras.optimizers.RMSprop(learning_rate=1e-3), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), +) + +# The displayed loss will be much higher than before +# due to the regularization component. +model.fit(x_train, y_train, batch_size=64, epochs=1) +``` + +Note that when you pass losses via `add_loss()`, it becomes possible to call +`compile()` without a loss function, since the model already has a loss to minimize. + +Consider the following `LogisticEndpoint` layer: it takes as inputs +targets & logits, and it tracks a crossentropy loss via `add_loss()`. + +```python +class LogisticEndpoint(keras.layers.Layer): + def __init__(self, name=None): + super().__init__(name=name) + self.loss_fn = keras.losses.BinaryCrossentropy(from_logits=True) + + def call(self, targets, logits, sample_weights=None): + # Compute the training-time loss value and add it + # to the layer using `self.add_loss()`. + loss = self.loss_fn(targets, logits, sample_weights) + self.add_loss(loss) + + # Return the inference-time prediction tensor (for `.predict()`). + return ops.softmax(logits) +``` + +You can use it in a model with two inputs (input data & targets), compiled without a +`loss` argument, like this: + +```python +inputs = keras.Input(shape=(3,), name="inputs") +targets = keras.Input(shape=(10,), name="targets") +logits = keras.layers.Dense(10)(inputs) +predictions = LogisticEndpoint(name="predictions")(targets, logits) + +model = keras.Model(inputs=[inputs, targets], outputs=predictions) +model.compile(optimizer="adam") # No loss argument! + +data = { + "inputs": np.random.random((3, 3)), + "targets": np.random.random((3, 10)), +} +model.fit(data) +``` + +For more information about training multi-input models, see the section **Passing data +to multi-input, multi-output models**. + +### Automatically setting apart a validation holdout set + +In the first end-to-end example you saw, we used the `validation_data` argument to pass +a tuple of NumPy arrays `(x_val, y_val)` to the model for evaluating a validation loss +and validation metrics at the end of each epoch. + +Here's another option: the argument `validation_split` allows you to automatically +reserve part of your training data for validation. The argument value represents the +fraction of the data to be reserved for validation, so it should be set to a number +higher than 0 and lower than 1. For instance, `validation_split=0.2` means "use 20% of +the data for validation", and `validation_split=0.6` means "use 60% of the data for +validation". + +The way the validation is computed is by taking the last x% samples of the arrays +received by the `fit()` call, before any shuffling. + +Note that you can only use `validation_split` when training with NumPy data. + +```python +model = get_compiled_model() +model.fit(x_train, y_train, batch_size=64, validation_split=0.2, epochs=1) +``` + +## Training & evaluation using `tf.data` Datasets + +In the past few paragraphs, you've seen how to handle losses, metrics, and optimizers, +and you've seen how to use the `validation_data` and `validation_split` arguments in +`fit()`, when your data is passed as NumPy arrays. + +Another option is to use an iterator-like, such as a `tf.data.Dataset`, a +PyTorch `DataLoader`, or a Keras `PyDataset`. Let's take look at the former. + +The `tf.data` API is a set of utilities in TensorFlow 2.0 for loading and preprocessing +data in a way that's fast and scalable. For a complete guide about creating `Datasets`, +see the [tf.data documentation](https://www.tensorflow.org/guide/data). + +**You can use `tf.data` to train your Keras +models regardless of the backend you're using -- +whether it's JAX, PyTorch, or TensorFlow.** +You can pass a `Dataset` instance directly to the methods `fit()`, `evaluate()`, and +`predict()`: + +```python +model = get_compiled_model() + +# First, let's create a training Dataset instance. +# For the sake of our example, we'll use the same MNIST data as before. +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +# Shuffle and slice the dataset. +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) + +# Now we get a test dataset. +test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)) +test_dataset = test_dataset.batch(64) + +# Since the dataset already takes care of batching, +# we don't pass a `batch_size` argument. +model.fit(train_dataset, epochs=3) + +# You can also evaluate or predict on a dataset. +print("Evaluate") +result = model.evaluate(test_dataset) +dict(zip(model.metrics_names, result)) +``` + +Note that the Dataset is reset at the end of each epoch, so it can be reused of the +next epoch. + +If you want to run training only on a specific number of batches from this Dataset, you +can pass the `steps_per_epoch` argument, which specifies how many training steps the +model should run using this Dataset before moving on to the next epoch. + +```python +model = get_compiled_model() + +# Prepare the training dataset +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) + +# Only use the 100 batches per epoch (that's 64 * 100 samples) +model.fit(train_dataset, epochs=3, steps_per_epoch=100) +``` + +You can also pass a `Dataset` instance as the `validation_data` argument in `fit()`: + +```python +model = get_compiled_model() + +# Prepare the training dataset +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) + +# Prepare the validation dataset +val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) +val_dataset = val_dataset.batch(64) + +model.fit(train_dataset, epochs=1, validation_data=val_dataset) +``` + +At the end of each epoch, the model will iterate over the validation dataset and +compute the validation loss and validation metrics. + +If you want to run validation only on a specific number of batches from this dataset, +you can pass the `validation_steps` argument, which specifies how many validation +steps the model should run with the validation dataset before interrupting validation +and moving on to the next epoch: + +```python +model = get_compiled_model() + +# Prepare the training dataset +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) + +# Prepare the validation dataset +val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) +val_dataset = val_dataset.batch(64) + +model.fit( + train_dataset, + epochs=1, + # Only run validation using the first 10 batches of the dataset + # using the `validation_steps` argument + validation_data=val_dataset, + validation_steps=10, +) +``` + +Note that the validation dataset will be reset after each use (so that you will always +be evaluating on the same samples from epoch to epoch). + +The argument `validation_split` (generating a holdout set from the training data) is +not supported when training from `Dataset` objects, since this feature requires the +ability to index the samples of the datasets, which is not possible in general with +the `Dataset` API. + +## Training & evaluation using `PyDataset` instances + +`keras.utils.PyDataset` is a utility that you can subclass to obtain +a Python generator with two important properties: + +- It works well with multiprocessing. +- It can be shuffled (e.g. when passing `shuffle=True` in `fit()`). + +A `PyDataset` must implement two methods: + +- `__getitem__` +- `__len__` + +The method `__getitem__` should return a complete batch. +If you want to modify your dataset between epochs, you may implement `on_epoch_end`. + +Here's a quick example: + +```python +class ExamplePyDataset(keras.utils.PyDataset): + def __init__(self, x, y, batch_size, **kwargs): + super().__init__(**kwargs) + self.x = x + self.y = y + self.batch_size = batch_size + + def __len__(self): + return int(np.ceil(len(self.x) / float(self.batch_size))) + + def __getitem__(self, idx): + batch_x = self.x[idx * self.batch_size : (idx + 1) * self.batch_size] + batch_y = self.y[idx * self.batch_size : (idx + 1) * self.batch_size] + return batch_x, batch_y + + +train_py_dataset = ExamplePyDataset(x_train, y_train, batch_size=32) +val_py_dataset = ExamplePyDataset(x_val, y_val, batch_size=32) +``` + +To fit the model, pass the dataset instead as the `x` argument (no need for a `y` +argument since the dataset includes the targets), and pass the validation dataset +as the `validation_data` argument. And no need for the `batch_size` argument, since +the dataset is already batched! + +```python +model = get_compiled_model() +model.fit(train_py_dataset, batch_size=64, validation_data=val_py_dataset, epochs=1) +``` + +Evaluating the model is just as easy: + +```python +model.evaluate(val_py_dataset) +``` + +Importantly, `PyDataset` objects support three common constructor arguments +that handle the parallel processing configuration: + +- `workers`: Number of workers to use in multithreading or + multiprocessing. Typically, you'd set it to the number of + cores on your CPU. +- `use_multiprocessing`: Whether to use Python multiprocessing for + parallelism. Setting this to `True` means that your + dataset will be replicated in multiple forked processes. + This is necessary to gain compute-level (rather than I/O level) + benefits from parallelism. However it can only be set to + `True` if your dataset can be safely pickled. +- `max_queue_size`: Maximum number of batches to keep in the queue + when iterating over the dataset in a multithreaded or + multipricessed setting. + You can reduce this value to reduce the CPU memory consumption of + your dataset. It defaults to 10. + +By default, multiprocessing is disabled (`use_multiprocessing=False`) and only +one thread is used. You should make sure to only turn on `use_multiprocessing` if +your code is running inside a Python `if __name__ == "__main__":` block in order +to avoid issues. + +Here's a 4-thread, non-multiprocessed example: + +```python +train_py_dataset = ExamplePyDataset(x_train, y_train, batch_size=32, workers=4) +val_py_dataset = ExamplePyDataset(x_val, y_val, batch_size=32, workers=4) + +model = get_compiled_model() +model.fit(train_py_dataset, batch_size=64, validation_data=val_py_dataset, epochs=1) +``` + +## Training & evaluation using PyTorch `DataLoader` objects + +All built-in training and evaluation APIs are also compatible with `torch.utils.data.Dataset` and +`torch.utils.data.DataLoader` objects -- regardless of whether you're using the PyTorch backend, +or the JAX or TensorFlow backends. Let's take a look at a simple example. + +Unlike `PyDataset` which are batch-centric, PyTorch `Dataset` objects are sample-centric: +the `__len__` method returns the number of samples, +and the `__getitem__` method returns a specific sample. + +```python +class ExampleTorchDataset(torch.utils.data.Dataset): + def __init__(self, x, y): + self.x = x + self.y = y + + def __len__(self): + return len(self.x) + + def __getitem__(self, idx): + return self.x[idx], self.y[idx] + + +train_torch_dataset = ExampleTorchDataset(x_train, y_train) +val_torch_dataset = ExampleTorchDataset(x_val, y_val) +``` + +To use a PyTorch Dataset, you need to wrap it into a `Dataloader` which takes care +of batching and shuffling: + +```python +train_dataloader = torch.utils.data.DataLoader( + train_torch_dataset, batch_size=32, shuffle=True +) +val_dataloader = torch.utils.data.DataLoader( + val_torch_dataset, batch_size=32, shuffle=True +) +``` + +Now you can use them in the Keras API just like any other iterator: + +```python +model = get_compiled_model() +model.fit(train_dataloader, batch_size=64, validation_data=val_dataloader, epochs=1) +model.evaluate(val_dataloader) +``` + +## Using sample weighting and class weighting + +With the default settings the weight of a sample is decided by its frequency +in the dataset. There are two methods to weight the data, independent of +sample frequency: + +* Class weights +* Sample weights + +### Class weights + +This is set by passing a dictionary to the `class_weight` argument to +`Model.fit()`. This dictionary maps class indices to the weight that should +be used for samples belonging to this class. + +This can be used to balance classes without resampling, or to train a +model that gives more importance to a particular class. + +For instance, if class "0" is half as represented as class "1" in your data, +you could use `Model.fit(..., class_weight={0: 1., 1: 0.5})`. + +Here's a NumPy example where we use class weights or sample weights to +give more importance to the correct classification of class #5 (which +is the digit "5" in the MNIST dataset). + +```python +class_weight = { + 0: 1.0, + 1: 1.0, + 2: 1.0, + 3: 1.0, + 4: 1.0, + # Set weight "2" for class "5", + # making this class 2x more important + 5: 2.0, + 6: 1.0, + 7: 1.0, + 8: 1.0, + 9: 1.0, +} + +print("Fit with class weight") +model = get_compiled_model() +model.fit(x_train, y_train, class_weight=class_weight, batch_size=64, epochs=1) +``` + +### Sample weights + +For fine grained control, or if you are not building a classifier, +you can use "sample weights". + +- When training from NumPy data: Pass the `sample_weight` + argument to `Model.fit()`. +- When training from `tf.data` or any other sort of iterator: + Yield `(input_batch, label_batch, sample_weight_batch)` tuples. + +A "sample weights" array is an array of numbers that specify how much weight +each sample in a batch should have in computing the total loss. It is commonly +used in imbalanced classification problems (the idea being to give more weight +to rarely-seen classes). + +When the weights used are ones and zeros, the array can be used as a *mask* for +the loss function (entirely discarding the contribution of certain samples to +the total loss). + +```python +sample_weight = np.ones(shape=(len(y_train),)) +sample_weight[y_train == 5] = 2.0 + +print("Fit with sample weight") +model = get_compiled_model() +model.fit(x_train, y_train, sample_weight=sample_weight, batch_size=64, epochs=1) +``` + +Here's a matching `Dataset` example: + +```python +sample_weight = np.ones(shape=(len(y_train),)) +sample_weight[y_train == 5] = 2.0 + +# Create a Dataset that includes sample weights +# (3rd element in the return tuple). +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train, sample_weight)) + +# Shuffle and slice the dataset. +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) + +model = get_compiled_model() +model.fit(train_dataset, epochs=1) +``` + +## Passing data to multi-input, multi-output models + +In the previous examples, we were considering a model with a single input (a tensor of +shape `(764,)`) and a single output (a prediction tensor of shape `(10,)`). But what +about models that have multiple inputs or outputs? + +Consider the following model, which has an image input of shape `(32, 32, 3)` (that's +`(height, width, channels)`) and a time series input of shape `(None, 10)` (that's +`(timesteps, features)`). Our model will have two outputs computed from the +combination of these inputs: a "score" (of shape `(1,)`) and a probability +distribution over five classes (of shape `(5,)`). + +```python +image_input = keras.Input(shape=(32, 32, 3), name="img_input") +timeseries_input = keras.Input(shape=(None, 10), name="ts_input") + +x1 = layers.Conv2D(3, 3)(image_input) +x1 = layers.GlobalMaxPooling2D()(x1) + +x2 = layers.Conv1D(3, 3)(timeseries_input) +x2 = layers.GlobalMaxPooling1D()(x2) + +x = layers.concatenate([x1, x2]) + +score_output = layers.Dense(1, name="score_output")(x) +class_output = layers.Dense(5, name="class_output")(x) + +model = keras.Model( + inputs=[image_input, timeseries_input], outputs=[score_output, class_output] +) +``` + +Let's plot this model, so you can clearly see what we're doing here (note that the +shapes shown in the plot are batch shapes, rather than per-sample shapes). + +```python +keras.utils.plot_model(model, "multi_input_and_output_model.png", show_shapes=True) +``` + +At compilation time, we can specify different losses to different outputs, by passing +the loss functions as a list: + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss=[ + keras.losses.MeanSquaredError(), + keras.losses.CategoricalCrossentropy(), + ], +) +``` + +If we only passed a single loss function to the model, the same loss function would be +applied to every output (which is not appropriate here). + +Likewise for metrics: + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss=[ + keras.losses.MeanSquaredError(), + keras.losses.CategoricalCrossentropy(), + ], + metrics=[ + [ + keras.metrics.MeanAbsolutePercentageError(), + keras.metrics.MeanAbsoluteError(), + ], + [keras.metrics.CategoricalAccuracy()], + ], +) +``` + +Since we gave names to our output layers, we could also specify per-output losses and +metrics via a dict: + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss={ + "score_output": keras.losses.MeanSquaredError(), + "class_output": keras.losses.CategoricalCrossentropy(), + }, + metrics={ + "score_output": [ + keras.metrics.MeanAbsolutePercentageError(), + keras.metrics.MeanAbsoluteError(), + ], + "class_output": [keras.metrics.CategoricalAccuracy()], + }, +) +``` + +We recommend the use of explicit names and dicts if you have more than 2 outputs. + +It's possible to give different weights to different output-specific losses (for +instance, one might wish to privilege the "score" loss in our example, by giving to 2x +the importance of the class loss), using the `loss_weights` argument: + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss={ + "score_output": keras.losses.MeanSquaredError(), + "class_output": keras.losses.CategoricalCrossentropy(), + }, + metrics={ + "score_output": [ + keras.metrics.MeanAbsolutePercentageError(), + keras.metrics.MeanAbsoluteError(), + ], + "class_output": [keras.metrics.CategoricalAccuracy()], + }, + loss_weights={"score_output": 2.0, "class_output": 1.0}, +) +``` + +You could also choose not to compute a loss for certain outputs, if these outputs are +meant for prediction but not for training: + +```python +# List loss version +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss=[None, keras.losses.CategoricalCrossentropy()], +) + +# Or dict loss version +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss={"class_output": keras.losses.CategoricalCrossentropy()}, +) +``` + +Passing data to a multi-input or multi-output model in `fit()` works in a similar way as +specifying a loss function in compile: you can pass **lists of NumPy arrays** (with +1:1 mapping to the outputs that received a loss function) or **dicts mapping output +names to NumPy arrays**. + +```python +model.compile( + optimizer=keras.optimizers.RMSprop(1e-3), + loss=[ + keras.losses.MeanSquaredError(), + keras.losses.CategoricalCrossentropy(), + ], +) + +# Generate dummy NumPy data +img_data = np.random.random_sample(size=(100, 32, 32, 3)) +ts_data = np.random.random_sample(size=(100, 20, 10)) +score_targets = np.random.random_sample(size=(100, 1)) +class_targets = np.random.random_sample(size=(100, 5)) + +# Fit on lists +model.fit([img_data, ts_data], [score_targets, class_targets], batch_size=32, epochs=1) + +# Alternatively, fit on dicts +model.fit( + {"img_input": img_data, "ts_input": ts_data}, + {"score_output": score_targets, "class_output": class_targets}, + batch_size=32, + epochs=1, +) +``` + +Here's the `Dataset` use case: similarly as what we did for NumPy arrays, the `Dataset` +should return a tuple of dicts. + +```python +train_dataset = tf.data.Dataset.from_tensor_slices( + ( + {"img_input": img_data, "ts_input": ts_data}, + {"score_output": score_targets, "class_output": class_targets}, + ) +) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64) + +model.fit(train_dataset, epochs=1) +``` + +## Using callbacks + +Callbacks in Keras are objects that are called at different points during training (at +the start of an epoch, at the end of a batch, at the end of an epoch, etc.). They +can be used to implement certain behaviors, such as: + +- Doing validation at different points during training (beyond the built-in per-epoch +validation) +- Checkpointing the model at regular intervals or when it exceeds a certain accuracy +threshold +- Changing the learning rate of the model when training seems to be plateauing +- Doing fine-tuning of the top layers when training seems to be plateauing +- Sending email or instant message notifications when training ends or where a certain +performance threshold is exceeded +- Etc. + +Callbacks can be passed as a list to your call to `fit()`: + +```python +model = get_compiled_model() + +callbacks = [ + keras.callbacks.EarlyStopping( + # Stop training when `val_loss` is no longer improving + monitor="val_loss", + # "no longer improving" being defined as "no better than 1e-2 less" + min_delta=1e-2, + # "no longer improving" being further defined as "for at least 2 epochs" + patience=2, + verbose=1, + ) +] +model.fit( + x_train, + y_train, + epochs=20, + batch_size=64, + callbacks=callbacks, + validation_split=0.2, +) +``` + +### Many built-in callbacks are available + +There are many built-in callbacks already available in Keras, such as: + +- `ModelCheckpoint`: Periodically save the model. +- `EarlyStopping`: Stop training when training is no longer improving the validation +metrics. +- `TensorBoard`: periodically write model logs that can be visualized in +[TensorBoard](https://www.tensorflow.org/tensorboard) (more details in the section +"Visualization"). +- `CSVLogger`: streams loss and metrics data to a CSV file. +- etc. + +See the [callbacks documentation](/api/callbacks/) for the complete list. + +### Writing your own callback + +You can create a custom callback by extending the base class +`keras.callbacks.Callback`. A callback has access to its associated model through the +class property `self.model`. + +Make sure to read the +[complete guide to writing custom callbacks](/guides/writing_your_own_callbacks/). + +Here's a simple example saving a list of per-batch loss values during training: + +```python +class LossHistory(keras.callbacks.Callback): + def on_train_begin(self, logs): + self.per_batch_losses = [] + + def on_batch_end(self, batch, logs): + self.per_batch_losses.append(logs.get("loss")) +``` + +## Checkpointing models + +When you're training model on relatively large datasets, it's crucial to save +checkpoints of your model at frequent intervals. + +The easiest way to achieve this is with the `ModelCheckpoint` callback: + +```python +model = get_compiled_model() + +callbacks = [ + keras.callbacks.ModelCheckpoint( + # Path where to save the model + # The two parameters below mean that we will overwrite + # the current checkpoint if and only if + # the `val_loss` score has improved. + # The saved model name will include the current epoch. + filepath="mymodel_{epoch}.keras", + save_best_only=True, # Only save a model if `val_loss` has improved. + monitor="val_loss", + verbose=1, + ) +] +model.fit( + x_train, + y_train, + epochs=2, + batch_size=64, + callbacks=callbacks, + validation_split=0.2, +) +``` + +The `ModelCheckpoint` callback can be used to implement fault-tolerance: +the ability to restart training from the last saved state of the model in case training +gets randomly interrupted. Here's a basic example: + +```python +# Prepare a directory to store all the checkpoints. +checkpoint_dir = "./ckpt" +if not os.path.exists(checkpoint_dir): + os.makedirs(checkpoint_dir) + + +def make_or_restore_model(): + # Either restore the latest model, or create a fresh one + # if there is no checkpoint available. + checkpoints = [checkpoint_dir + "/" + name for name in os.listdir(checkpoint_dir)] + if checkpoints: + latest_checkpoint = max(checkpoints, key=os.path.getctime) + print("Restoring from", latest_checkpoint) + return keras.models.load_model(latest_checkpoint) + print("Creating a new model") + return get_compiled_model() + + +model = make_or_restore_model() +callbacks = [ + # This callback saves the model every 100 batches. + # We include the training loss in the saved model name. + keras.callbacks.ModelCheckpoint( + filepath=checkpoint_dir + "/model-loss={loss:.2f}.keras", save_freq=100 + ) +] +model.fit(x_train, y_train, epochs=1, callbacks=callbacks) +``` + +You call also write your own callback for saving and restoring models. + +For a complete guide on serialization and saving, see the +[guide to saving and serializing Models](/guides/serialization_and_saving/). + +## Using learning rate schedules + +A common pattern when training deep learning models is to gradually reduce the learning +as training progresses. This is generally known as "learning rate decay". + +The learning decay schedule could be static (fixed in advance, as a function of the +current epoch or the current batch index), or dynamic (responding to the current +behavior of the model, in particular the validation loss). + +### Passing a schedule to an optimizer + +You can easily use a static learning rate decay schedule by passing a schedule object +as the `learning_rate` argument in your optimizer: + +```python +initial_learning_rate = 0.1 +lr_schedule = keras.optimizers.schedules.ExponentialDecay( + initial_learning_rate, decay_steps=100000, decay_rate=0.96, staircase=True +) + +optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule) +``` + +Several built-in schedules are available: `ExponentialDecay`, `PiecewiseConstantDecay`, +`PolynomialDecay`, and `InverseTimeDecay`. + +### Using callbacks to implement a dynamic learning rate schedule + +A dynamic learning rate schedule (for instance, decreasing the learning rate when the +validation loss is no longer improving) cannot be achieved with these schedule objects, +since the optimizer does not have access to validation metrics. + +However, callbacks do have access to all metrics, including validation metrics! You can +thus achieve this pattern by using a callback that modifies the current learning rate +on the optimizer. In fact, this is even built-in as the `ReduceLROnPlateau` callback. + +## Visualizing loss and metrics during training with TensorBoard + +The best way to keep an eye on your model during training is to use +[TensorBoard](https://www.tensorflow.org/tensorboard) -- a browser-based application +that you can run locally that provides you with: + +- Live plots of the loss and metrics for training and evaluation +- (optionally) Visualizations of the histograms of your layer activations +- (optionally) 3D visualizations of the embedding spaces learned by your `Embedding` +layers + +If you have installed TensorFlow with pip, you should be able to launch TensorBoard +from the command line: + +``` +tensorboard --logdir=/full_path_to_your_logs +``` + +### Using the TensorBoard callback + +The easiest way to use TensorBoard with a Keras model and the `fit()` method is the +`TensorBoard` callback. + +In the simplest case, just specify where you want the callback to write logs, and +you're good to go: + +```python +keras.callbacks.TensorBoard( + log_dir="/full_path_to_your_logs", + histogram_freq=0, # How often to log histogram visualizations + embeddings_freq=0, # How often to log embedding visualizations + update_freq="epoch", +) # How often to write logs (default: once per epoch) +``` + +For more information, see the +[documentation for the `TensorBoard` callback](https://keras.io/api/callbacks/tensorboard/). + diff --git a/.tether/vignettes-src/transfer_learning.Rmd b/.tether/vignettes-src/transfer_learning.Rmd new file mode 100644 index 0000000000..9230064c82 --- /dev/null +++ b/.tether/vignettes-src/transfer_learning.Rmd @@ -0,0 +1,542 @@ +--- +title: Transfer learning & fine-tuning +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2020/04/15 +last-modified: 2023/06/25 +description: Complete guide to transfer learning & fine-tuning in Keras. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/transfer_learning.py +--- + +## Setup + +```python +import numpy as np +import keras +from keras import layers +import tensorflow_datasets as tfds +import matplotlib.pyplot as plt +``` + +## Introduction + +**Transfer learning** consists of taking features learned on one problem, and +leveraging them on a new, similar problem. For instance, features from a model that has +learned to identify racoons may be useful to kick-start a model meant to identify + tanukis. + +Transfer learning is usually done for tasks where your dataset has too little data to + train a full-scale model from scratch. + +The most common incarnation of transfer learning in the context of deep learning is the + following workflow: + +1. Take layers from a previously trained model. +2. Freeze them, so as to avoid destroying any of the information they contain during + future training rounds. +3. Add some new, trainable layers on top of the frozen layers. They will learn to turn + the old features into predictions on a new dataset. +4. Train the new layers on your dataset. + +A last, optional step, is **fine-tuning**, which consists of unfreezing the entire +model you obtained above (or part of it), and re-training it on the new data with a +very low learning rate. This can potentially achieve meaningful improvements, by + incrementally adapting the pretrained features to the new data. + +First, we will go over the Keras `trainable` API in detail, which underlies most + transfer learning & fine-tuning workflows. + +Then, we'll demonstrate the typical workflow by taking a model pretrained on the +ImageNet dataset, and retraining it on the Kaggle "cats vs dogs" classification + dataset. + +This is adapted from +[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python) +and the 2016 blog post +["building powerful image classification models using very little data"](https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html). + +## Freezing layers: understanding the `trainable` attribute + +Layers & models have three weight attributes: + +- `weights` is the list of all weights variables of the layer. +- `trainable_weights` is the list of those that are meant to be updated (via gradient + descent) to minimize the loss during training. +- `non_trainable_weights` is the list of those that aren't meant to be trained. + Typically they are updated by the model during the forward pass. + +**Example: the `Dense` layer has 2 trainable weights (kernel & bias)** + +```python +layer = keras.layers.Dense(3) +layer.build((None, 4)) # Create the weights + +print("weights:", len(layer.weights)) +print("trainable_weights:", len(layer.trainable_weights)) +print("non_trainable_weights:", len(layer.non_trainable_weights)) +``` + +In general, all weights are trainable weights. The only built-in layer that has +non-trainable weights is the `BatchNormalization` layer. It uses non-trainable weights + to keep track of the mean and variance of its inputs during training. +To learn how to use non-trainable weights in your own custom layers, see the +[guide to writing new layers from scratch](/guides/making_new_layers_and_models_via_subclassing/). + +**Example: the `BatchNormalization` layer has 2 trainable weights and 2 non-trainable + weights** + +```python +layer = keras.layers.BatchNormalization() +layer.build((None, 4)) # Create the weights + +print("weights:", len(layer.weights)) +print("trainable_weights:", len(layer.trainable_weights)) +print("non_trainable_weights:", len(layer.non_trainable_weights)) +``` + +Layers & models also feature a boolean attribute `trainable`. Its value can be changed. +Setting `layer.trainable` to `False` moves all the layer's weights from trainable to +non-trainable. This is called "freezing" the layer: the state of a frozen layer won't +be updated during training (either when training with `fit()` or when training with + any custom loop that relies on `trainable_weights` to apply gradient updates). + +**Example: setting `trainable` to `False`** + +```python +layer = keras.layers.Dense(3) +layer.build((None, 4)) # Create the weights +layer.trainable = False # Freeze the layer + +print("weights:", len(layer.weights)) +print("trainable_weights:", len(layer.trainable_weights)) +print("non_trainable_weights:", len(layer.non_trainable_weights)) +``` + +When a trainable weight becomes non-trainable, its value is no longer updated during + training. + +```python +# Make a model with 2 layers +layer1 = keras.layers.Dense(3, activation="relu") +layer2 = keras.layers.Dense(3, activation="sigmoid") +model = keras.Sequential([keras.Input(shape=(3,)), layer1, layer2]) + +# Freeze the first layer +layer1.trainable = False + +# Keep a copy of the weights of layer1 for later reference +initial_layer1_weights_values = layer1.get_weights() + +# Train the model +model.compile(optimizer="adam", loss="mse") +model.fit(np.random.random((2, 3)), np.random.random((2, 3))) + +# Check that the weights of layer1 have not changed during training +final_layer1_weights_values = layer1.get_weights() +np.testing.assert_allclose( + initial_layer1_weights_values[0], final_layer1_weights_values[0] +) +np.testing.assert_allclose( + initial_layer1_weights_values[1], final_layer1_weights_values[1] +) +``` + +Do not confuse the `layer.trainable` attribute with the argument `training` in +`layer.__call__()` (which controls whether the layer should run its forward pass in + inference mode or training mode). For more information, see the +[Keras FAQ]( + https://keras.io/getting_started/faq/#whats-the-difference-between-the-training-argument-in-call-and-the-trainable-attribute). + +## Recursive setting of the `trainable` attribute + +If you set `trainable = False` on a model or on any layer that has sublayers, +all children layers become non-trainable as well. + +**Example:** + +```python +inner_model = keras.Sequential( + [ + keras.Input(shape=(3,)), + keras.layers.Dense(3, activation="relu"), + keras.layers.Dense(3, activation="relu"), + ] +) + +model = keras.Sequential( + [ + keras.Input(shape=(3,)), + inner_model, + keras.layers.Dense(3, activation="sigmoid"), + ] +) + +model.trainable = False # Freeze the outer model + +assert inner_model.trainable == False # All layers in `model` are now frozen +assert inner_model.layers[0].trainable == False # `trainable` is propagated recursively +``` + +## The typical transfer-learning workflow + +This leads us to how a typical transfer learning workflow can be implemented in Keras: + +1. Instantiate a base model and load pre-trained weights into it. +2. Freeze all layers in the base model by setting `trainable = False`. +3. Create a new model on top of the output of one (or several) layers from the base + model. +4. Train your new model on your new dataset. + +Note that an alternative, more lightweight workflow could also be: + +1. Instantiate a base model and load pre-trained weights into it. +2. Run your new dataset through it and record the output of one (or several) layers + from the base model. This is called **feature extraction**. +3. Use that output as input data for a new, smaller model. + +A key advantage of that second workflow is that you only run the base model once on + your data, rather than once per epoch of training. So it's a lot faster & cheaper. + +An issue with that second workflow, though, is that it doesn't allow you to dynamically +modify the input data of your new model during training, which is required when doing +data augmentation, for instance. Transfer learning is typically used for tasks when +your new dataset has too little data to train a full-scale model from scratch, and in +such scenarios data augmentation is very important. So in what follows, we will focus + on the first workflow. + +Here's what the first workflow looks like in Keras: + +First, instantiate a base model with pre-trained weights. + +```python +base_model = keras.applications.Xception( + weights='imagenet', # Load weights pre-trained on ImageNet. + input_shape=(150, 150, 3), + include_top=False) # Do not include the ImageNet classifier at the top. +``` + +Then, freeze the base model. + +```python +base_model.trainable = False +``` + +Create a new model on top. + +```python +inputs = keras.Input(shape=(150, 150, 3)) +# We make sure that the base_model is running in inference mode here, +# by passing `training=False`. This is important for fine-tuning, as you will +# learn in a few paragraphs. +x = base_model(inputs, training=False) +# Convert features of shape `base_model.output_shape[1:]` to vectors +x = keras.layers.GlobalAveragePooling2D()(x) +# A Dense classifier with a single unit (binary classification) +outputs = keras.layers.Dense(1)(x) +model = keras.Model(inputs, outputs) +``` + +Train the model on new data. + +```python +model.compile(optimizer=keras.optimizers.Adam(), + loss=keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[keras.metrics.BinaryAccuracy()]) +model.fit(new_dataset, epochs=20, callbacks=..., validation_data=...) +``` + +## Fine-tuning + +Once your model has converged on the new data, you can try to unfreeze all or part of + the base model and retrain the whole model end-to-end with a very low learning rate. + +This is an optional last step that can potentially give you incremental improvements. + It could also potentially lead to quick overfitting -- keep that in mind. + +It is critical to only do this step *after* the model with frozen layers has been +trained to convergence. If you mix randomly-initialized trainable layers with +trainable layers that hold pre-trained features, the randomly-initialized layers will +cause very large gradient updates during training, which will destroy your pre-trained + features. + +It's also critical to use a very low learning rate at this stage, because +you are training a much larger model than in the first round of training, on a dataset + that is typically very small. +As a result, you are at risk of overfitting very quickly if you apply large weight + updates. Here, you only want to readapt the pretrained weights in an incremental way. + +This is how to implement fine-tuning of the whole base model: + +```python +# Unfreeze the base model +base_model.trainable = True + +# It's important to recompile your model after you make any changes +# to the `trainable` attribute of any inner layer, so that your changes +# are take into account +model.compile(optimizer=keras.optimizers.Adam(1e-5), # Very low learning rate + loss=keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[keras.metrics.BinaryAccuracy()]) + +# Train end-to-end. Be careful to stop before you overfit! +model.fit(new_dataset, epochs=10, callbacks=..., validation_data=...) +``` + +**Important note about `compile()` and `trainable`** + +Calling `compile()` on a model is meant to "freeze" the behavior of that model. This + implies that the `trainable` +attribute values at the time the model is compiled should be preserved throughout the + lifetime of that model, +until `compile` is called again. Hence, if you change any `trainable` value, make sure + to call `compile()` again on your +model for your changes to be taken into account. + +**Important notes about `BatchNormalization` layer** + +Many image models contain `BatchNormalization` layers. That layer is a special case on + every imaginable count. Here are a few things to keep in mind. + +- `BatchNormalization` contains 2 non-trainable weights that get updated during +training. These are the variables tracking the mean and variance of the inputs. +- When you set `bn_layer.trainable = False`, the `BatchNormalization` layer will +run in inference mode, and will not update its mean & variance statistics. This is not +the case for other layers in general, as +[weight trainability & inference/training modes are two orthogonal concepts]( + https://keras.io/getting_started/faq/#whats-the-difference-between-the-training-argument-in-call-and-the-trainable-attribute). +But the two are tied in the case of the `BatchNormalization` layer. +- When you unfreeze a model that contains `BatchNormalization` layers in order to do +fine-tuning, you should keep the `BatchNormalization` layers in inference mode by + passing `training=False` when calling the base model. +Otherwise the updates applied to the non-trainable weights will suddenly destroy +what the model has learned. + +You'll see this pattern in action in the end-to-end example at the end of this guide. + +## An end-to-end example: fine-tuning an image classification model on a cats vs. dogs dataset + +To solidify these concepts, let's walk you through a concrete end-to-end transfer +learning & fine-tuning example. We will load the Xception model, pre-trained on + ImageNet, and use it on the Kaggle "cats vs. dogs" classification dataset. + +### Getting the data + +First, let's fetch the cats vs. dogs dataset using TFDS. If you have your own dataset, +you'll probably want to use the utility +`keras.utils.image_dataset_from_directory` to generate similar labeled + dataset objects from a set of images on disk filed into class-specific folders. + +Transfer learning is most useful when working with very small datasets. To keep our +dataset small, we will use 40% of the original training data (25,000 images) for + training, 10% for validation, and 10% for testing. + +```python +tfds.disable_progress_bar() + +train_ds, validation_ds, test_ds = tfds.load( + "cats_vs_dogs", + # Reserve 10% for validation and 10% for test + split=["train[:40%]", "train[40%:50%]", "train[50%:60%]"], + as_supervised=True, # Include labels +) + +print(f"Number of training samples: {train_ds.cardinality()}") +print(f"Number of validation samples: {validation_ds.cardinality()}") +print(f"Number of test samples: {test_ds.cardinality()}") +``` + +These are the first 9 images in the training dataset -- as you can see, they're all +different sizes. + +```python +plt.figure(figsize=(10, 10)) +for i, (image, label) in enumerate(train_ds.take(9)): + ax = plt.subplot(3, 3, i + 1) + plt.imshow(image) + plt.title(int(label)) + plt.axis("off") +``` + +We can also see that label 1 is "dog" and label 0 is "cat". + +### Standardizing the data + +Our raw images have a variety of sizes. In addition, each pixel consists of 3 integer +values between 0 and 255 (RGB level values). This isn't a great fit for feeding a +neural network. We need to do 2 things: + +- Standardize to a fixed image size. We pick 150x150. +- Normalize pixel values between -1 and 1. We'll do this using a `Normalization` layer as +part of the model itself. + +In general, it's a good practice to develop models that take raw data as input, as +opposed to models that take already-preprocessed data. The reason being that, if your +model expects preprocessed data, any time you export your model to use it elsewhere +(in a web browser, in a mobile app), you'll need to reimplement the exact same +preprocessing pipeline. This gets very tricky very quickly. So we should do the least + possible amount of preprocessing before hitting the model. + +Here, we'll do image resizing in the data pipeline (because a deep neural network can +only process contiguous batches of data), and we'll do the input value scaling as part +of the model, when we create it. + +Let's resize images to 150x150: + +```python +resize_fn = keras.layers.Resizing(150, 150) + +train_ds = train_ds.map(lambda x, y: (resize_fn(x), y)) +validation_ds = validation_ds.map(lambda x, y: (resize_fn(x), y)) +test_ds = test_ds.map(lambda x, y: (resize_fn(x), y)) +``` + +### Using random data augmentation + +When you don't have a large image dataset, it's a good practice to artificially +introduce sample diversity by applying random yet realistic transformations to +the training images, such as random horizontal flipping or small random rotations. This +helps expose the model to different aspects of the training data while slowing down +overfitting. + +```python +augmentation_layers = [ + layers.RandomFlip("horizontal"), + layers.RandomRotation(0.1), +] + + +def data_augmentation(x): + for layer in augmentation_layers: + x = layer(x) + return x + + +train_ds = train_ds.map(lambda x, y: (data_augmentation(x), y)) +``` + +Let's batch the data and use prefetching to optimize loading speed. + +```python +from tensorflow import data as tf_data + +batch_size = 64 + +train_ds = train_ds.batch(batch_size).prefetch(tf_data.AUTOTUNE).cache() +validation_ds = validation_ds.batch(batch_size).prefetch(tf_data.AUTOTUNE).cache() +test_ds = test_ds.batch(batch_size).prefetch(tf_data.AUTOTUNE).cache() +``` + +Let's visualize what the first image of the first batch looks like after various random + transformations: + +```python +for images, labels in train_ds.take(1): + plt.figure(figsize=(10, 10)) + first_image = images[0] + for i in range(9): + ax = plt.subplot(3, 3, i + 1) + augmented_image = data_augmentation(np.expand_dims(first_image, 0)) + plt.imshow(np.array(augmented_image[0]).astype("int32")) + plt.title(int(labels[0])) + plt.axis("off") +``` + +## Build a model + +Now let's built a model that follows the blueprint we've explained earlier. + +Note that: + +- We add a `Rescaling` layer to scale input values (initially in the `[0, 255]` + range) to the `[-1, 1]` range. +- We add a `Dropout` layer before the classification layer, for regularization. +- We make sure to pass `training=False` when calling the base model, so that +it runs in inference mode, so that batchnorm statistics don't get updated +even after we unfreeze the base model for fine-tuning. + +```python +base_model = keras.applications.Xception( + weights="imagenet", # Load weights pre-trained on ImageNet. + input_shape=(150, 150, 3), + include_top=False, +) # Do not include the ImageNet classifier at the top. + +# Freeze the base_model +base_model.trainable = False + +# Create new model on top +inputs = keras.Input(shape=(150, 150, 3)) + +# Pre-trained Xception weights requires that input be scaled +# from (0, 255) to a range of (-1., +1.), the rescaling layer +# outputs: `(inputs * scale) + offset` +scale_layer = keras.layers.Rescaling(scale=1 / 127.5, offset=-1) +x = scale_layer(inputs) + +# The base model contains batchnorm layers. We want to keep them in inference mode +# when we unfreeze the base model for fine-tuning, so we make sure that the +# base_model is running in inference mode here. +x = base_model(x, training=False) +x = keras.layers.GlobalAveragePooling2D()(x) +x = keras.layers.Dropout(0.2)(x) # Regularize with dropout +outputs = keras.layers.Dense(1)(x) +model = keras.Model(inputs, outputs) + +model.summary(show_trainable=True) +``` + +## Train the top layer + +```python +model.compile( + optimizer=keras.optimizers.Adam(), + loss=keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[keras.metrics.BinaryAccuracy()], +) + +epochs = 2 +print("Fitting the top layer of the model") +model.fit(train_ds, epochs=epochs, validation_data=validation_ds) +``` + +## Do a round of fine-tuning of the entire model + +Finally, let's unfreeze the base model and train the entire model end-to-end with a low + learning rate. + +Importantly, although the base model becomes trainable, it is still running in +inference mode since we passed `training=False` when calling it when we built the +model. This means that the batch normalization layers inside won't update their batch +statistics. If they did, they would wreck havoc on the representations learned by the + model so far. + +```python +# Unfreeze the base_model. Note that it keeps running in inference mode +# since we passed `training=False` when calling it. This means that +# the batchnorm layers will not update their batch statistics. +# This prevents the batchnorm layers from undoing all the training +# we've done so far. +base_model.trainable = True +model.summary(show_trainable=True) + +model.compile( + optimizer=keras.optimizers.Adam(1e-5), # Low learning rate + loss=keras.losses.BinaryCrossentropy(from_logits=True), + metrics=[keras.metrics.BinaryAccuracy()], +) + +epochs = 1 +print("Fitting the end-to-end model") +model.fit(train_ds, epochs=epochs, validation_data=validation_ds) +``` + +After 10 epochs, fine-tuning gains us a nice improvement here. +Let's evaluate the model on the test dataset: + +```python +print("Test dataset evaluation") +model.evaluate(test_ds) +``` + diff --git a/.tether/vignettes-src/understanding_masking_and_padding.Rmd b/.tether/vignettes-src/understanding_masking_and_padding.Rmd new file mode 100644 index 0000000000..fd3469a9b5 --- /dev/null +++ b/.tether/vignettes-src/understanding_masking_and_padding.Rmd @@ -0,0 +1,362 @@ +--- +title: Understanding masking & padding +authors: Scott Zhu, Francois Chollet +date-created: 2019/07/16 +last-modified: 2023/06/25 +description: Complete guide to using mask-aware sequence layers in Keras. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras/guides/understanding_masking_and_padding.py +--- + +## Setup + +```python +import numpy as np +import keras +from keras import ops +from keras import layers +``` + +## Introduction + +**Masking** is a way to tell sequence-processing layers that certain timesteps +in an input are missing, and thus should be skipped when processing the data. + +**Padding** is a special form of masking where the masked steps are at the start or +the end of a sequence. Padding comes from the need to encode sequence data into +contiguous batches: in order to make all sequences in a batch fit a given standard +length, it is necessary to pad or truncate some sequences. + +Let's take a close look. + +## Padding sequence data + +When processing sequence data, it is very common for individual samples to have +different lengths. Consider the following example (text tokenized as words): + +``` +[ + ["Hello", "world", "!"], + ["How", "are", "you", "doing", "today"], + ["The", "weather", "will", "be", "nice", "tomorrow"], +] +``` + +After vocabulary lookup, the data might be vectorized as integers, e.g.: + +``` +[ + [71, 1331, 4231] + [73, 8, 3215, 55, 927], + [83, 91, 1, 645, 1253, 927], +] +``` + +The data is a nested list where individual samples have length 3, 5, and 6, +respectively. Since the input data for a deep learning model must be a single tensor +(of shape e.g. `(batch_size, 6, vocab_size)` in this case), samples that are shorter +than the longest item need to be padded with some placeholder value (alternatively, +one might also truncate long samples before padding short samples). + +Keras provides a utility function to truncate and pad Python lists to a common length: +`keras.utils.pad_sequences`. + +```python +raw_inputs = [ + [711, 632, 71], + [73, 8, 3215, 55, 927], + [83, 91, 1, 645, 1253, 927], +] + +# By default, this will pad using 0s; it is configurable via the +# "value" parameter. +# Note that you could use "pre" padding (at the beginning) or +# "post" padding (at the end). +# We recommend using "post" padding when working with RNN layers +# (in order to be able to use the +# CuDNN implementation of the layers). +padded_inputs = keras.utils.pad_sequences(raw_inputs, padding="post") +print(padded_inputs) +``` + +## Masking + +Now that all samples have a uniform length, the model must be informed that some part +of the data is actually padding and should be ignored. That mechanism is **masking**. + +There are three ways to introduce input masks in Keras models: + +- Add a `keras.layers.Masking` layer. +- Configure a `keras.layers.Embedding` layer with `mask_zero=True`. +- Pass a `mask` argument manually when calling layers that support this argument (e.g. +RNN layers). + +## Mask-generating layers: `Embedding` and `Masking` + +Under the hood, these layers will create a mask tensor (2D tensor with shape `(batch, +sequence_length)`), and attach it to the tensor output returned by the `Masking` or +`Embedding` layer. + +```python +embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True) +masked_output = embedding(padded_inputs) + +print(masked_output._keras_mask) + +masking_layer = layers.Masking() +# Simulate the embedding lookup by expanding the 2D input to 3D, +# with embedding dimension of 10. +unmasked_embedding = ops.cast( + ops.tile(ops.expand_dims(padded_inputs, axis=-1), [1, 1, 10]), + dtype="float32", +) + +masked_embedding = masking_layer(unmasked_embedding) +print(masked_embedding._keras_mask) +``` + +As you can see from the printed result, the mask is a 2D boolean tensor with shape +`(batch_size, sequence_length)`, where each individual `False` entry indicates that +the corresponding timestep should be ignored during processing. + +## Mask propagation in the Functional API and Sequential API + +When using the Functional API or the Sequential API, a mask generated by an `Embedding` +or `Masking` layer will be propagated through the network for any layer that is +capable of using them (for example, RNN layers). Keras will automatically fetch the +mask corresponding to an input and pass it to any layer that knows how to use it. + +For instance, in the following Sequential model, the `LSTM` layer will automatically +receive a mask, which means it will ignore padded values: + +```python +model = keras.Sequential( + [ + layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True), + layers.LSTM(32), + ] +) +``` + +This is also the case for the following Functional API model: + +```python +inputs = keras.Input(shape=(None,), dtype="int32") +x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs) +outputs = layers.LSTM(32)(x) + +model = keras.Model(inputs, outputs) +``` + +## Passing mask tensors directly to layers + +Layers that can handle masks (such as the `LSTM` layer) have a `mask` argument in their +`__call__` method. + +Meanwhile, layers that produce a mask (e.g. `Embedding`) expose a `compute_mask(input, +previous_mask)` method which you can call. + +Thus, you can pass the output of the `compute_mask()` method of a mask-producing layer +to the `__call__` method of a mask-consuming layer, like this: + +```python +class MyLayer(layers.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.embedding = layers.Embedding( + input_dim=5000, output_dim=16, mask_zero=True + ) + self.lstm = layers.LSTM(32) + + def call(self, inputs): + x = self.embedding(inputs) + # Note that you could also prepare a `mask` tensor manually. + # It only needs to be a boolean tensor + # with the right shape, i.e. (batch_size, timesteps). + mask = self.embedding.compute_mask(inputs) + output = self.lstm( + x, mask=mask + ) # The layer will ignore the masked values + return output + + +layer = MyLayer() +x = np.random.random((32, 10)) * 100 +x = x.astype("int32") +layer(x) +``` + +## Supporting masking in your custom layers + +Sometimes, you may need to write layers that generate a mask (like `Embedding`), or +layers that need to modify the current mask. + +For instance, any layer that produces a tensor with a different time dimension than its +input, such as a `Concatenate` layer that concatenates on the time dimension, will +need to modify the current mask so that downstream layers will be able to properly +take masked timesteps into account. + +To do this, your layer should implement the `layer.compute_mask()` method, which +produces a new mask given the input and the current mask. + +Here is an example of a `TemporalSplit` layer that needs to modify the current mask. + +```python +class TemporalSplit(keras.layers.Layer): + """Split the input tensor into 2 tensors along the time dimension.""" + + def call(self, inputs): + # Expect the input to be 3D and mask to be 2D, split the input tensor into 2 + # subtensors along the time axis (axis 1). + return ops.split(inputs, 2, axis=1) + + def compute_mask(self, inputs, mask=None): + # Also split the mask into 2 if it presents. + if mask is None: + return None + return ops.split(mask, 2, axis=1) + + +first_half, second_half = TemporalSplit()(masked_embedding) +print(first_half._keras_mask) +print(second_half._keras_mask) +``` + +Here is another example of a `CustomEmbedding` layer that is capable of generating a +mask from input values: + +```python +class CustomEmbedding(keras.layers.Layer): + def __init__(self, input_dim, output_dim, mask_zero=False, **kwargs): + super().__init__(**kwargs) + self.input_dim = input_dim + self.output_dim = output_dim + self.mask_zero = mask_zero + + def build(self, input_shape): + self.embeddings = self.add_weight( + shape=(self.input_dim, self.output_dim), + initializer="random_normal", + dtype="float32", + ) + + def call(self, inputs): + inputs = ops.cast(inputs, "int32") + return ops.take(self.embeddings, inputs) + + def compute_mask(self, inputs, mask=None): + if not self.mask_zero: + return None + return ops.not_equal(inputs, 0) + + +layer = CustomEmbedding(10, 32, mask_zero=True) +x = np.random.random((3, 10)) * 9 +x = x.astype("int32") + +y = layer(x) +mask = layer.compute_mask(x) + +print(mask) +``` + +Note: For more details about format limitations related to masking, see the +[serialization guide](/guides/serialization_and_saving). + +## Opting-in to mask propagation on compatible layers + +Most layers don't modify the time dimension, so don't need to modify the current mask. +However, they may still want to be able to **propagate** the current mask, unchanged, +to the next layer. **This is an opt-in behavior.** By default, a custom layer will +destroy the current mask (since the framework has no way to tell whether propagating +the mask is safe to do). + +If you have a custom layer that does not modify the time dimension, and if you want it +to be able to propagate the current input mask, you should set `self.supports_masking += True` in the layer constructor. In this case, the default behavior of +`compute_mask()` is to just pass the current mask through. + +Here's an example of a layer that is whitelisted for mask propagation: + +```python +class MyActivation(keras.layers.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + # Signal that the layer is safe for mask propagation + self.supports_masking = True + + def call(self, inputs): + return ops.relu(inputs) +``` + +You can now use this custom layer in-between a mask-generating layer (like `Embedding`) +and a mask-consuming layer (like `LSTM`), and it will pass the mask along so that it +reaches the mask-consuming layer. + +```python +inputs = keras.Input(shape=(None,), dtype="int32") +x = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True)(inputs) +x = MyActivation()(x) # Will pass the mask along +print("Mask found:", x._keras_mask) +outputs = layers.LSTM(32)(x) # Will receive the mask + +model = keras.Model(inputs, outputs) +y = model(np.random.randint(0, 5000, size=(32, 100))) +``` + +## Writing layers that need mask information + +Some layers are mask *consumers*: they accept a `mask` argument in `call` and use it to +determine whether to skip certain time steps. + +To write such a layer, you can simply add a `mask=None` argument in your `call` +signature. The mask associated with the inputs will be passed to your layer whenever +it is available. + +Here's a simple example below: a layer that computes a softmax over the time dimension +(axis 1) of an input sequence, while discarding masked timesteps. + +```python +class TemporalSoftmax(keras.layers.Layer): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.supports_masking = True + + def call(self, inputs, mask=None): + assert mask is not None + broadcast_float_mask = ops.expand_dims(ops.cast(mask, "float32"), -1) + inputs_exp = ops.exp(inputs) * broadcast_float_mask + inputs_sum = ops.sum( + inputs_exp * broadcast_float_mask, axis=-1, keepdims=True + ) + return inputs_exp / inputs_sum + + +inputs = keras.Input(shape=(None,), dtype="int32") +x = layers.Embedding(input_dim=10, output_dim=32, mask_zero=True)(inputs) +x = layers.Dense(1)(x) +outputs = TemporalSoftmax()(x) + +model = keras.Model(inputs, outputs) +y = model(np.random.randint(0, 10, size=(32, 100))) +``` + +## Summary + +That is all you need to know about padding & masking in Keras. To recap: + +- "Masking" is how layers are able to know when to skip / ignore certain timesteps in +sequence inputs. +- Some layers are mask-generators: `Embedding` can generate a mask from input values +(if `mask_zero=True`), and so can the `Masking` layer. +- Some layers are mask-consumers: they expose a `mask` argument in their `__call__` +method. This is the case for RNN layers. +- In the Functional API and Sequential API, mask information is propagated +automatically. +- When using layers in a standalone way, you can pass the `mask` arguments to layers +manually. +- You can easily write layers that modify the current mask, that generate a new mask, +or that consume the mask associated with the inputs. + diff --git a/.tether/vignettes-src/working_with_rnns.Rmd b/.tether/vignettes-src/working_with_rnns.Rmd new file mode 100644 index 0000000000..24ae146594 --- /dev/null +++ b/.tether/vignettes-src/working_with_rnns.Rmd @@ -0,0 +1,560 @@ +--- +title: Working with RNNs +authors: Scott Zhu, Francois Chollet +date-created: 2019/07/08 +last-modified: 2023/07/10 +description: Complete guide to using & customizing RNN layers. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette) +--- + +## Introduction + +Recurrent neural networks (RNN) are a class of neural networks that is powerful for +modeling sequence data such as time series or natural language. + +Schematically, a RNN layer uses a `for` loop to iterate over the timesteps of a +sequence, while maintaining an internal state that encodes information about the +timesteps it has seen so far. + +The Keras RNN API is designed with a focus on: + +- **Ease of use**: the built-in `keras.layers.RNN`, `keras.layers.LSTM`, +`keras.layers.GRU` layers enable you to quickly build recurrent models without +having to make difficult configuration choices. + +- **Ease of customization**: You can also define your own RNN cell layer (the inner +part of the `for` loop) with custom behavior, and use it with the generic +`keras.layers.RNN` layer (the `for` loop itself). This allows you to quickly +prototype different research ideas in a flexible way with minimal code. + +## Setup + +```python +import numpy as np +import tensorflow as tf +import keras +from keras import layers +``` + +## Built-in RNN layers: a simple example + +There are three built-in RNN layers in Keras: + +1. `keras.layers.SimpleRNN`, a fully-connected RNN where the output from previous +timestep is to be fed to next timestep. + +2. `keras.layers.GRU`, first proposed in +[Cho et al., 2014](https://arxiv.org/abs/1406.1078). + +3. `keras.layers.LSTM`, first proposed in +[Hochreiter & Schmidhuber, 1997](https://www.bioinf.jku.at/publications/older/2604.pdf). + +In early 2015, Keras had the first reusable open-source Python implementations of LSTM +and GRU. + +Here is a simple example of a `Sequential` model that processes sequences of integers, +embeds each integer into a 64-dimensional vector, then processes the sequence of +vectors using a `LSTM` layer. + +```python +model = keras.Sequential() +# Add an Embedding layer expecting input vocab of size 1000, and +# output embedding dimension of size 64. +model.add(layers.Embedding(input_dim=1000, output_dim=64)) + +# Add a LSTM layer with 128 internal units. +model.add(layers.LSTM(128)) + +# Add a Dense layer with 10 units. +model.add(layers.Dense(10)) + +model.summary() +``` + +Built-in RNNs support a number of useful features: + +- Recurrent dropout, via the `dropout` and `recurrent_dropout` arguments +- Ability to process an input sequence in reverse, via the `go_backwards` argument +- Loop unrolling (which can lead to a large speedup when processing short sequences on +CPU), via the `unroll` argument +- ...and more. + +For more information, see the +[RNN API documentation](https://keras.io/api/layers/recurrent_layers/). + +## Outputs and states + +By default, the output of a RNN layer contains a single vector per sample. This vector +is the RNN cell output corresponding to the last timestep, containing information +about the entire input sequence. The shape of this output is `(batch_size, units)` +where `units` corresponds to the `units` argument passed to the layer's constructor. + +A RNN layer can also return the entire sequence of outputs for each sample (one vector +per timestep per sample), if you set `return_sequences=True`. The shape of this output +is `(batch_size, timesteps, units)`. + +```python +model = keras.Sequential() +model.add(layers.Embedding(input_dim=1000, output_dim=64)) + +# The output of GRU will be a 3D tensor of shape (batch_size, timesteps, 256) +model.add(layers.GRU(256, return_sequences=True)) + +# The output of SimpleRNN will be a 2D tensor of shape (batch_size, 128) +model.add(layers.SimpleRNN(128)) + +model.add(layers.Dense(10)) + +model.summary() +``` + +In addition, a RNN layer can return its final internal state(s). The returned states +can be used to resume the RNN execution later, or +[to initialize another RNN](https://arxiv.org/abs/1409.3215). +This setting is commonly used in the +encoder-decoder sequence-to-sequence model, where the encoder final state is used as +the initial state of the decoder. + +To configure a RNN layer to return its internal state, set the `return_state` parameter +to `True` when creating the layer. Note that `LSTM` has 2 state tensors, but `GRU` +only has one. + +To configure the initial state of the layer, just call the layer with additional +keyword argument `initial_state`. +Note that the shape of the state needs to match the unit size of the layer, like in the +example below. + +```python +encoder_vocab = 1000 +decoder_vocab = 2000 + +encoder_input = layers.Input(shape=(None,)) +encoder_embedded = layers.Embedding(input_dim=encoder_vocab, output_dim=64)( + encoder_input +) + +# Return states in addition to output +output, state_h, state_c = layers.LSTM(64, return_state=True, name="encoder")( + encoder_embedded +) +encoder_state = [state_h, state_c] + +decoder_input = layers.Input(shape=(None,)) +decoder_embedded = layers.Embedding(input_dim=decoder_vocab, output_dim=64)( + decoder_input +) + +# Pass the 2 states to a new LSTM layer, as initial state +decoder_output = layers.LSTM(64, name="decoder")( + decoder_embedded, initial_state=encoder_state +) +output = layers.Dense(10)(decoder_output) + +model = keras.Model([encoder_input, decoder_input], output) +model.summary() +``` + +## RNN layers and RNN cells + +In addition to the built-in RNN layers, the RNN API also provides cell-level APIs. +Unlike RNN layers, which processes whole batches of input sequences, the RNN cell only +processes a single timestep. + +The cell is the inside of the `for` loop of a RNN layer. Wrapping a cell inside a +`keras.layers.RNN` layer gives you a layer capable of processing batches of +sequences, e.g. `RNN(LSTMCell(10))`. + +Mathematically, `RNN(LSTMCell(10))` produces the same result as `LSTM(10)`. In fact, +the implementation of this layer in TF v1.x was just creating the corresponding RNN +cell and wrapping it in a RNN layer. However using the built-in `GRU` and `LSTM` +layers enable the use of CuDNN and you may see better performance. + +There are three built-in RNN cells, each of them corresponding to the matching RNN +layer. + +- `keras.layers.SimpleRNNCell` corresponds to the `SimpleRNN` layer. + +- `keras.layers.GRUCell` corresponds to the `GRU` layer. + +- `keras.layers.LSTMCell` corresponds to the `LSTM` layer. + +The cell abstraction, together with the generic `keras.layers.RNN` class, make it +very easy to implement custom RNN architectures for your research. + +## Cross-batch statefulness + +When processing very long sequences (possibly infinite), you may want to use the +pattern of **cross-batch statefulness**. + +Normally, the internal state of a RNN layer is reset every time it sees a new batch +(i.e. every sample seen by the layer is assumed to be independent of the past). The +layer will only maintain a state while processing a given sample. + +If you have very long sequences though, it is useful to break them into shorter +sequences, and to feed these shorter sequences sequentially into a RNN layer without +resetting the layer's state. That way, the layer can retain information about the +entirety of the sequence, even though it's only seeing one sub-sequence at a time. + +You can do this by setting `stateful=True` in the constructor. + +If you have a sequence `s = [t0, t1, ... t1546, t1547]`, you would split it into e.g. + +``` +s1 = [t0, t1, ... t100] +s2 = [t101, ... t201] +... +s16 = [t1501, ... t1547] +``` + +Then you would process it via: + +```python +lstm_layer = layers.LSTM(64, stateful=True) +for s in sub_sequences: + output = lstm_layer(s) +``` + +When you want to clear the state, you can use `layer.reset_states()`. + + +> Note: In this setup, sample `i` in a given batch is assumed to be the continuation of +sample `i` in the previous batch. This means that all batches should contain the same +number of samples (batch size). E.g. if a batch contains `[sequence_A_from_t0_to_t100, + sequence_B_from_t0_to_t100]`, the next batch should contain +`[sequence_A_from_t101_to_t200, sequence_B_from_t101_to_t200]`. + + +Here is a complete example: + +```python +paragraph1 = np.random.random((20, 10, 50)).astype(np.float32) +paragraph2 = np.random.random((20, 10, 50)).astype(np.float32) +paragraph3 = np.random.random((20, 10, 50)).astype(np.float32) + +lstm_layer = layers.LSTM(64, stateful=True) +output = lstm_layer(paragraph1) +output = lstm_layer(paragraph2) +output = lstm_layer(paragraph3) + +# reset_states() will reset the cached state to the original initial_state. +# If no initial_state was provided, zero-states will be used by default. +lstm_layer.reset_states() +``` + +### RNN State Reuse + + +The recorded states of the RNN layer are not included in the `layer.weights()`. If you +would like to reuse the state from a RNN layer, you can retrieve the states value by +`layer.states` and use it as the +initial state for a new layer via the Keras functional API like `new_layer(inputs, +initial_state=layer.states)`, or model subclassing. + +Please also note that sequential model might not be used in this case since it only +supports layers with single input and output, the extra input of initial state makes +it impossible to use here. + +```python +paragraph1 = np.random.random((20, 10, 50)).astype(np.float32) +paragraph2 = np.random.random((20, 10, 50)).astype(np.float32) +paragraph3 = np.random.random((20, 10, 50)).astype(np.float32) + +lstm_layer = layers.LSTM(64, stateful=True) +output = lstm_layer(paragraph1) +output = lstm_layer(paragraph2) + +existing_state = lstm_layer.states + +new_lstm_layer = layers.LSTM(64) +new_output = new_lstm_layer(paragraph3, initial_state=existing_state) +``` + +## Bidirectional RNNs + +For sequences other than time series (e.g. text), it is often the case that a RNN model +can perform better if it not only processes sequence from start to end, but also +backwards. For example, to predict the next word in a sentence, it is often useful to +have the context around the word, not only just the words that come before it. + +Keras provides an easy API for you to build such bidirectional RNNs: the +`keras.layers.Bidirectional` wrapper. + +```python +model = keras.Sequential() + +model.add( + layers.Bidirectional(layers.LSTM(64, return_sequences=True), input_shape=(5, 10)) +) +model.add(layers.Bidirectional(layers.LSTM(32))) +model.add(layers.Dense(10)) + +model.summary() +``` + +Under the hood, `Bidirectional` will copy the RNN layer passed in, and flip the +`go_backwards` field of the newly copied layer, so that it will process the inputs in +reverse order. + +The output of the `Bidirectional` RNN will be, by default, the concatenation of the forward layer +output and the backward layer output. If you need a different merging behavior, e.g. +concatenation, change the `merge_mode` parameter in the `Bidirectional` wrapper +constructor. For more details about `Bidirectional`, please check +[the API docs](https://keras.io/api/layers/recurrent_layers/bidirectional/). + +## Performance optimization and CuDNN kernels + +In TensorFlow 2.0, the built-in LSTM and GRU layers have been updated to leverage CuDNN +kernels by default when a GPU is available. With this change, the prior +`keras.layers.CuDNNLSTM/CuDNNGRU` layers have been deprecated, and you can build your +model without worrying about the hardware it will run on. + +Since the CuDNN kernel is built with certain assumptions, this means the layer **will +not be able to use the CuDNN kernel if you change the defaults of the built-in LSTM or +GRU layers**. E.g.: + +- Changing the `activation` function from `tanh` to something else. +- Changing the `recurrent_activation` function from `sigmoid` to something else. +- Using `recurrent_dropout` > 0. +- Setting `unroll` to True, which forces LSTM/GRU to decompose the inner +`tf.while_loop` into an unrolled `for` loop. +- Setting `use_bias` to False. +- Using masking when the input data is not strictly right padded (if the mask +corresponds to strictly right padded data, CuDNN can still be used. This is the most +common case). + +For the detailed list of constraints, please see the documentation for the +[LSTM](https://keras.io/api/layers/recurrent_layers/lstm/) and +[GRU](https://keras.io/api/layers/recurrent_layers/gru/) layers. + +### Using CuDNN kernels when available + +Let's build a simple LSTM model to demonstrate the performance difference. + +We'll use as input sequences the sequence of rows of MNIST digits (treating each row of +pixels as a timestep), and we'll predict the digit's label. + +```python +batch_size = 64 +# Each MNIST image batch is a tensor of shape (batch_size, 28, 28). +# Each input sequence will be of size (28, 28) (height is treated like time). +input_dim = 28 + +units = 64 +output_size = 10 # labels are from 0 to 9 + + +# Build the RNN model +def build_model(allow_cudnn_kernel=True): + # CuDNN is only available at the layer level, and not at the cell level. + # This means `LSTM(units)` will use the CuDNN kernel, + # while RNN(LSTMCell(units)) will run on non-CuDNN kernel. + if allow_cudnn_kernel: + # The LSTM layer with default options uses CuDNN. + lstm_layer = keras.layers.LSTM(units, input_shape=(None, input_dim)) + else: + # Wrapping a LSTMCell in a RNN layer will not use CuDNN. + lstm_layer = keras.layers.RNN( + keras.layers.LSTMCell(units), input_shape=(None, input_dim) + ) + model = keras.models.Sequential( + [ + lstm_layer, + keras.layers.BatchNormalization(), + keras.layers.Dense(output_size), + ] + ) + return model +``` + +Let's load the MNIST dataset: + +```python +mnist = keras.datasets.mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() +x_train, x_test = x_train / 255.0, x_test / 255.0 +sample, sample_label = x_train[0], y_train[0] +``` + +Let's create a model instance and train it. + +We choose `sparse_categorical_crossentropy` as the loss function for the model. The +output of the model has shape of `[batch_size, 10]`. The target for the model is an +integer vector, each of the integer is in the range of 0 to 9. + +```python +model = build_model(allow_cudnn_kernel=True) + +model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer="sgd", + metrics=["accuracy"], +) + + +model.fit( + x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1 +) +``` + +Now, let's compare to a model that does not use the CuDNN kernel: + +```python +noncudnn_model = build_model(allow_cudnn_kernel=False) +noncudnn_model.set_weights(model.get_weights()) +noncudnn_model.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + optimizer="sgd", + metrics=["accuracy"], +) +noncudnn_model.fit( + x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=1 +) +``` + +When running on a machine with a NVIDIA GPU and CuDNN installed, +the model built with CuDNN is much faster to train compared to the +model that uses the regular TensorFlow kernel. + +The same CuDNN-enabled model can also be used to run inference in a CPU-only +environment. The `tf.device` annotation below is just forcing the device placement. +The model will run on CPU by default if no GPU is available. + +You simply don't have to worry about the hardware you're running on anymore. Isn't that +pretty cool? + +```python +import matplotlib.pyplot as plt + +with tf.device("CPU:0"): + cpu_model = build_model(allow_cudnn_kernel=True) + cpu_model.set_weights(model.get_weights()) + result = tf.argmax(cpu_model.predict_on_batch(tf.expand_dims(sample, 0)), axis=1) + print( + "Predicted result is: %s, target result is: %s" % (result.numpy(), sample_label) + ) + plt.imshow(sample, cmap=plt.get_cmap("gray")) +``` + +## RNNs with list/dict inputs, or nested inputs + +Nested structures allow implementers to include more information within a single +timestep. For example, a video frame could have audio and video input at the same +time. The data shape in this case could be: + +`[batch, timestep, {"video": [height, width, channel], "audio": [frequency]}]` + +In another example, handwriting data could have both coordinates x and y for the +current position of the pen, as well as pressure information. So the data +representation could be: + +`[batch, timestep, {"location": [x, y], "pressure": [force]}]` + +The following code provides an example of how to build a custom RNN cell that accepts +such structured inputs. + +### Define a custom cell that supports nested input/output + +See [Making new Layers & Models via subclassing](making_new_layers_and_models_via_subclassing.html) +for details on writing your own layers. + +```python +@keras.saving.register_keras_serializable() +class NestedCell(keras.layers.Layer): + def __init__(self, unit_1, unit_2, unit_3, **kwargs): + self.unit_1 = unit_1 + self.unit_2 = unit_2 + self.unit_3 = unit_3 + self.state_size = [tf.TensorShape([unit_1]), tf.TensorShape([unit_2, unit_3])] + self.output_size = [tf.TensorShape([unit_1]), tf.TensorShape([unit_2, unit_3])] + super().__init__(**kwargs) + + def build(self, input_shapes): + # expect input_shape to contain 2 items, [(batch, i1), (batch, i2, i3)] + i1 = input_shapes[0][1] + i2 = input_shapes[1][1] + i3 = input_shapes[1][2] + + self.kernel_1 = self.add_weight( + shape=(i1, self.unit_1), initializer="uniform", name="kernel_1" + ) + self.kernel_2_3 = self.add_weight( + shape=(i2, i3, self.unit_2, self.unit_3), + initializer="uniform", + name="kernel_2_3", + ) + + def call(self, inputs, states): + # inputs should be in [(batch, input_1), (batch, input_2, input_3)] + # state should be in shape [(batch, unit_1), (batch, unit_2, unit_3)] + input_1, input_2 = tf.nest.flatten(inputs) + s1, s2 = states + + output_1 = tf.matmul(input_1, self.kernel_1) + output_2_3 = tf.einsum("bij,ijkl->bkl", input_2, self.kernel_2_3) + state_1 = s1 + output_1 + state_2_3 = s2 + output_2_3 + + output = (output_1, output_2_3) + new_states = (state_1, state_2_3) + + return output, new_states + + def get_config(self): + return {"unit_1": self.unit_1, "unit_2": unit_2, "unit_3": self.unit_3} +``` + +### Build a RNN model with nested input/output + +Let's build a Keras model that uses a `keras.layers.RNN` layer and the custom cell +we just defined. + +```python +unit_1 = 10 +unit_2 = 20 +unit_3 = 30 + +i1 = 32 +i2 = 64 +i3 = 32 +batch_size = 64 +num_batches = 10 +timestep = 50 + +cell = NestedCell(unit_1, unit_2, unit_3) +rnn = keras.layers.RNN(cell) + +input_1 = keras.Input((None, i1)) +input_2 = keras.Input((None, i2, i3)) + +outputs = rnn((input_1, input_2)) + +model = keras.models.Model([input_1, input_2], outputs) + +model.compile(optimizer="adam", loss="mse", metrics=["accuracy"]) +``` + +### Train the model with randomly generated data + +Since there isn't a good candidate dataset for this model, we use random Numpy data for +demonstration. + +```python +input_1_data = np.random.random((batch_size * num_batches, timestep, i1)) +input_2_data = np.random.random((batch_size * num_batches, timestep, i2, i3)) +target_1_data = np.random.random((batch_size * num_batches, unit_1)) +target_2_data = np.random.random((batch_size * num_batches, unit_2, unit_3)) +input_data = [input_1_data, input_2_data] +target_data = [target_1_data, target_2_data] + +model.fit(input_data, target_data, batch_size=batch_size) +``` + +With the Keras `keras.layers.RNN` layer, You are only expected to define the math +logic for individual step within the sequence, and the `keras.layers.RNN` layer +will handle the sequence iteration for you. It's an incredibly powerful way to quickly +prototype new kinds of RNNs (e.g. a LSTM variant). + +For more details, please visit the [API docs](https://keras.io/api/layers/recurrent_layers/rnn/). diff --git a/.tether/vignettes-src/writing_a_custom_training_loop_in_jax.Rmd b/.tether/vignettes-src/writing_a_custom_training_loop_in_jax.Rmd new file mode 100644 index 0000000000..5e3e872487 --- /dev/null +++ b/.tether/vignettes-src/writing_a_custom_training_loop_in_jax.Rmd @@ -0,0 +1,514 @@ +--- +title: Writing a training loop from scratch in JAX +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/25 +last-modified: 2023/06/25 +description: Writing low-level training & evaluation loops in JAX. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras/guides/writing_a_custom_training_loop_in_jax.py +--- + +## Setup + +```python +import os + +# This guide can only be run with the jax backend. +os.environ["KERAS_BACKEND"] = "jax" + +import jax + +# We import TF so we can use tf.data. +import tensorflow as tf +import keras +import numpy as np +``` + +## Introduction + +Keras provides default training and evaluation loops, `fit()` and `evaluate()`. +Their usage is covered in the guide +[Training & evaluation with the built-in methods](https://keras.io/guides/training_with_built_in_methods/). + +If you want to customize the learning algorithm of your model while still leveraging +the convenience of `fit()` +(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and +implement your own `train_step()` method, which +is called repeatedly during `fit()`. + +Now, if you want very low-level control over training & evaluation, you should write +your own training & evaluation loops from scratch. This is what this guide is about. + +## A first end-to-end example + +To write a custom training loop, we need the following ingredients: + +- A model to train, of course. +- An optimizer. You could either use an optimizer from `keras.optimizers`, or +one from the `optax` package. +- A loss function. +- A dataset. The standard in the JAX ecosystem is to load data via `tf.data`, +so that's what we'll use. + +Let's line them up. + +First, let's get the model and the MNIST dataset: + +```python +def get_model(): + inputs = keras.Input(shape=(784,), name="digits") + x1 = keras.layers.Dense(64, activation="relu")(inputs) + x2 = keras.layers.Dense(64, activation="relu")(x1) + outputs = keras.layers.Dense(10, name="predictions")(x2) + model = keras.Model(inputs=inputs, outputs=outputs) + return model + + +model = get_model() + +# Prepare the training dataset. +batch_size = 32 +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() +x_train = np.reshape(x_train, (-1, 784)).astype("float32") +x_test = np.reshape(x_test, (-1, 784)).astype("float32") +y_train = keras.utils.to_categorical(y_train) +y_test = keras.utils.to_categorical(y_test) + +# Reserve 10,000 samples for validation. +x_val = x_train[-10000:] +y_val = y_train[-10000:] +x_train = x_train[:-10000] +y_train = y_train[:-10000] + +# Prepare the training dataset. +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size) + +# Prepare the validation dataset. +val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) +val_dataset = val_dataset.batch(batch_size) +``` + +Next, here's the loss function and the optimizer. +We'll use a Keras optimizer in this case. + +```python +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Instantiate an optimizer. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +``` + +### Getting gradients in JAX + +Let's train our model using mini-batch gradient with a custom training loop. + +In JAX, gradients are computed via *metaprogramming*: you call the `jax.grad` (or +`jax.value_and_grad` on a function in order to create a gradient-computing function +for that first function. + +So the first thing we need is a function that returns the loss value. +That's the function we'll use to generate the gradient function. Something like this: + +```python +def compute_loss(x, y): + ... + return loss +``` + +Once you have such a function, you can compute gradients via metaprogramming as such: + +```python +grad_fn = jax.grad(compute_loss) +grads = grad_fn(x, y) +``` + +Typically, you don't just want to get the gradient values, you also want to get +the loss value. You can do this by using `jax.value_and_grad` instead of `jax.grad`: + +```python +grad_fn = jax.value_and_grad(compute_loss) +loss, grads = grad_fn(x, y) +``` + +### JAX computation is purely stateless + +In JAX, everything must be a stateless function -- so our loss computation function +must be stateless as well. That means that all Keras variables (e.g. weight tensors) +must be passed as function inputs, and any variable that has been updated during the +forward pass must be returned as function output. The function have no side effect. + +During the forward pass, the non-trainable variables of a Keras model might get +updated. These variables could be, for instance, RNG seed state variables or +BatchNormalization statistics. We're going to need to return those. So we need +something like this: + +```python +def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y): + ... + return loss, non_trainable_variables +``` + +Once you have such a function, you can get the gradient function by +specifying `hax_aux` in `value_and_grad`: it tells JAX that the loss +computation function returns more outputs than just the loss. Note that the loss +should always be the first output. + +```python +grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True) +(loss, non_trainable_variables), grads = grad_fn( + trainable_variables, non_trainable_variables, x, y +) +``` + +Now that we have established the basics, +let's implement this `compute_loss_and_updates` function. +Keras models have a `stateless_call` method which will come in handy here. +It works just like `model.__call__`, but it requires you to explicitly +pass the value of all the variables in the model, and it returns not just +the `__call__` outputs but also the (potentially updated) non-trainable +variables. + +```python +def compute_loss_and_updates( + trainable_variables, non_trainable_variables, x, y +): + y_pred, non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss = loss_fn(y, y_pred) + return loss, non_trainable_variables +``` + +Let's get the gradient function: + +```python +grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True) +``` + +### The training step function + +Next, let's implement the end-to-end training step, the function +that will both run the forward pass, compute the loss, compute the gradients, +but also use the optimizer to update the trainable variables. This function +also needs to be stateless, so it will get as input a `state` tuple that +includes every state element we're going to use: + +- `trainable_variables` and `non_trainable_variables`: the model's variables. +- `optimizer_variables`: the optimizer's state variables, +such as momentum accumulators. + +To update the trainable variables, we use the optimizer's stateless method +`stateless_apply`. It's equivalent to `optimizer.apply()`, but it requires +always passing `trainable_variables` and `optimizer_variables`. It returns +both the updated trainable variables and the updated optimizer_variables. + +```python +def train_step(state, data): + trainable_variables, non_trainable_variables, optimizer_variables = state + x, y = data + (loss, non_trainable_variables), grads = grad_fn( + trainable_variables, non_trainable_variables, x, y + ) + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + # Return updated state + return loss, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + ) +``` + +### Make it fast with `jax.jit` + +By default, JAX operations run eagerly, +just like in TensorFlow eager mode and PyTorch eager mode. +And just like TensorFlow eager mode and PyTorch eager mode, it's pretty slow +-- eager mode is better used as a debugging environment, not as a way to do +any actual work. So let's make our `train_step` fast by compiling it. + +When you have a stateless JAX function, you can compile it to XLA via the +`@jax.jit` decorator. It will get traced during its first execution, and in +subsequent executions you will be executing the traced graph (this is just +like `@tf.function(jit_compile=True)`. Let's try it: + +```python +@jax.jit +def train_step(state, data): + trainable_variables, non_trainable_variables, optimizer_variables = state + x, y = data + (loss, non_trainable_variables), grads = grad_fn( + trainable_variables, non_trainable_variables, x, y + ) + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + # Return updated state + return loss, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + ) +``` + +We're now ready to train our model. The training loop itself +is trivial: we just repeatedly call `loss, state = train_step(state, data)`. + +Note: + +- We convert the TF tensors yielded by the `tf.data.Dataset` to NumPy +before passing them to our JAX function. +- All variables must be built beforehand: +the model must be built and the optimizer must be built. Since we're using a +Functional API model, it's already built, but if it were a subclassed model +you'd need to call it on a batch of data to build it. + +```python +# Build optimizer variables. +optimizer.build(model.trainable_variables) + +trainable_variables = model.trainable_variables +non_trainable_variables = model.non_trainable_variables +optimizer_variables = optimizer.variables +state = trainable_variables, non_trainable_variables, optimizer_variables + +# Training loop +for step, data in enumerate(train_dataset): + data = (data[0].numpy(), data[1].numpy()) + loss, state = train_step(state, data) + # Log every 100 batches. + if step % 100 == 0: + print(f"Training loss (for 1 batch) at step {step}: {float(loss):.4f}") + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +A key thing to notice here is that the loop is entirely stateless -- the variables +attached to the model (`model.weights`) are never getting updated during the loop. +Their new values are only stored in the `state` tuple. That means that at some point, +before saving the model, you should be attaching the new variable values back to the model. + +Just call `variable.assign(new_value)` on each model variable you want to update: + +```python +trainable_variables, non_trainable_variables, optimizer_variables = state +for variable, value in zip(model.trainable_variables, trainable_variables): + variable.assign(value) +for variable, value in zip( + model.non_trainable_variables, non_trainable_variables +): + variable.assign(value) +``` + +## Low-level handling of metrics + +Let's add metrics monitoring to this basic training loop. + +You can readily reuse built-in Keras metrics (or custom ones you wrote) in such training +loops written from scratch. Here's the flow: + +- Instantiate the metric at the start of the loop +- Include `metric_variables` in the `train_step` arguments +and `compute_loss_and_updates` arguments. +- Call `metric.stateless_update_state()` in the `compute_loss_and_updates` function. +It's equivalent to `update_state()` -- only stateless. +- When you need to display the current value of the metric, outside the `train_step` +(in the eager scope), attach the new metric variable values to the metric object +and vall `metric.result()`. +- Call `metric.reset_state()` when you need to clear the state of the metric +(typically at the end of an epoch) + +Let's use this knowledge to compute `CategoricalAccuracy` on training and +validation data at the end of training: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.CategoricalAccuracy() +val_acc_metric = keras.metrics.CategoricalAccuracy() + + +def compute_loss_and_updates( + trainable_variables, non_trainable_variables, metric_variables, x, y +): + y_pred, non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss = loss_fn(y, y_pred) + metric_variables = train_acc_metric.stateless_update_state( + metric_variables, y, y_pred + ) + return loss, (non_trainable_variables, metric_variables) + + +grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True) + + +@jax.jit +def train_step(state, data): + ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, + ) = state + x, y = data + (loss, (non_trainable_variables, metric_variables)), grads = grad_fn( + trainable_variables, non_trainable_variables, metric_variables, x, y + ) + trainable_variables, optimizer_variables = optimizer.stateless_apply( + optimizer_variables, grads, trainable_variables + ) + # Return updated state + return loss, ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, + ) +``` + +We'll also prepare an evaluation step function: + +```python +@jax.jit +def eval_step(state, data): + trainable_variables, non_trainable_variables, metric_variables = state + x, y = data + y_pred, non_trainable_variables = model.stateless_call( + trainable_variables, non_trainable_variables, x + ) + loss = loss_fn(y, y_pred) + metric_variables = val_acc_metric.stateless_update_state( + metric_variables, y, y_pred + ) + return loss, ( + trainable_variables, + non_trainable_variables, + metric_variables, + ) +``` + +Here are our loops: + +```python +# Build optimizer variables. +optimizer.build(model.trainable_variables) + +trainable_variables = model.trainable_variables +non_trainable_variables = model.non_trainable_variables +optimizer_variables = optimizer.variables +metric_variables = train_acc_metric.variables +state = ( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, +) + +# Training loop +for step, data in enumerate(train_dataset): + data = (data[0].numpy(), data[1].numpy()) + loss, state = train_step(state, data) + # Log every 100 batches. + if step % 100 == 0: + print(f"Training loss (for 1 batch) at step {step}: {float(loss):.4f}") + _, _, _, metric_variables = state + for variable, value in zip( + train_acc_metric.variables, metric_variables + ): + variable.assign(value) + print(f"Training accuracy: {train_acc_metric.result()}") + print(f"Seen so far: {(step + 1) * batch_size} samples") + +metric_variables = val_acc_metric.variables +( + trainable_variables, + non_trainable_variables, + optimizer_variables, + metric_variables, +) = state +state = trainable_variables, non_trainable_variables, metric_variables + +# Eval loop +for step, data in enumerate(val_dataset): + data = (data[0].numpy(), data[1].numpy()) + loss, state = eval_step(state, data) + # Log every 100 batches. + if step % 100 == 0: + print( + f"Validation loss (for 1 batch) at step {step}: {float(loss):.4f}" + ) + _, _, metric_variables = state + for variable, value in zip(val_acc_metric.variables, metric_variables): + variable.assign(value) + print(f"Validation accuracy: {val_acc_metric.result()}") + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +## Low-level handling of losses tracked by the model + +Layers & models recursively track any losses created during the forward pass +by layers that call `self.add_loss(value)`. The resulting list of scalar loss +values are available via the property `model.losses` +at the end of the forward pass. + +If you want to be using these loss components, you should sum them +and add them to the main loss in your training step. + +Consider this layer, that creates an activity regularization loss: + +```python +class ActivityRegularizationLayer(keras.layers.Layer): + def call(self, inputs): + self.add_loss(1e-2 * jax.numpy.sum(inputs)) + return inputs +``` + +Let's build a really simple model that uses it: + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu")(inputs) +# Insert activity regularization as a layer +x = ActivityRegularizationLayer()(x) +x = keras.layers.Dense(64, activation="relu")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +``` + +Here's what our `compute_loss_and_updates` function should look like now: + +- Pass `return_losses=True` to `model.stateless_call()`. +- Sum the resulting `losses` and add them to the main loss. + +```python +def compute_loss_and_updates( + trainable_variables, non_trainable_variables, metric_variables, x, y +): + y_pred, non_trainable_variables, losses = model.stateless_call( + trainable_variables, non_trainable_variables, x, return_losses=True + ) + loss = loss_fn(y, y_pred) + if losses: + loss += jax.numpy.sum(losses) + metric_variables = train_acc_metric.stateless_update_state( + metric_variables, y, y_pred + ) + return loss, non_trainable_variables, metric_variables +``` + +That's it! + diff --git a/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd b/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd new file mode 100644 index 0000000000..bfc75688bd --- /dev/null +++ b/.tether/vignettes-src/writing_a_custom_training_loop_in_tensorflow.Rmd @@ -0,0 +1,509 @@ +--- +title: Writing a training loop from scratch in TensorFlow +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2019/03/01 +last-modified: 2023/06/25 +description: Writing low-level training & evaluation loops in TensorFlow. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/writing_a_custom_training_loop_in_tensorflow.py +--- + +## Setup + +```python +import time +import os + +# This guide can only be run with the TensorFlow backend. +os.environ["KERAS_BACKEND"] = "tensorflow" + +import tensorflow as tf +import keras +import numpy as np +``` + +## Introduction + +Keras provides default training and evaluation loops, `fit()` and `evaluate()`. +Their usage is covered in the guide +[Training & evaluation with the built-in methods](/guides/training_with_built_in_methods/). + +If you want to customize the learning algorithm of your model while still leveraging +the convenience of `fit()` +(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and +implement your own `train_step()` method, which +is called repeatedly during `fit()`. + +Now, if you want very low-level control over training & evaluation, you should write +your own training & evaluation loops from scratch. This is what this guide is about. + +## A first end-to-end example + +Let's consider a simple MNIST model: + +```python +def get_model(): + inputs = keras.Input(shape=(784,), name="digits") + x1 = keras.layers.Dense(64, activation="relu")(inputs) + x2 = keras.layers.Dense(64, activation="relu")(x1) + outputs = keras.layers.Dense(10, name="predictions")(x2) + model = keras.Model(inputs=inputs, outputs=outputs) + return model + + +model = get_model() +``` + +Let's train it using mini-batch gradient with a custom training loop. + +First, we're going to need an optimizer, a loss function, and a dataset: + +```python +# Instantiate an optimizer. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +# Prepare the training dataset. +batch_size = 32 +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() +x_train = np.reshape(x_train, (-1, 784)) +x_test = np.reshape(x_test, (-1, 784)) + +# Reserve 10,000 samples for validation. +x_val = x_train[-10000:] +y_val = y_train[-10000:] +x_train = x_train[:-10000] +y_train = y_train[:-10000] + +# Prepare the training dataset. +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size) + +# Prepare the validation dataset. +val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) +val_dataset = val_dataset.batch(batch_size) +``` + +Calling a model inside a `GradientTape` scope enables you to retrieve the gradients of +the trainable weights of the layer with respect to a loss value. Using an optimizer +instance, you can use these gradients to update these variables (which you can +retrieve using `model.trainable_weights`). + +Here's our training loop, step by step: + +- We open a `for` loop that iterates over epochs +- For each epoch, we open a `for` loop that iterates over the dataset, in batches +- For each batch, we open a `GradientTape()` scope +- Inside this scope, we call the model (forward pass) and compute the loss +- Outside the scope, we retrieve the gradients of the weights +of the model with regard to the loss +- Finally, we use the optimizer to update the weights of the model based on the +gradients + +```python +epochs = 3 +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + + # Iterate over the batches of the dataset. + for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): + # Open a GradientTape to record the operations run + # during the forward pass, which enables auto-differentiation. + with tf.GradientTape() as tape: + # Run the forward pass of the layer. + # The operations that the layer applies + # to its inputs are going to be recorded + # on the GradientTape. + logits = model(x_batch_train, training=True) # Logits for this minibatch + + # Compute the loss value for this minibatch. + loss_value = loss_fn(y_batch_train, logits) + + # Use the gradient tape to automatically retrieve + # the gradients of the trainable variables with respect to the loss. + grads = tape.gradient(loss_value, model.trainable_weights) + + # Run one step of gradient descent by updating + # the value of the variables to minimize the loss. + optimizer.apply(grads, model.trainable_weights) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +## Low-level handling of metrics + +Let's add metrics monitoring to this basic loop. + +You can readily reuse the built-in metrics (or custom ones you wrote) in such training +loops written from scratch. Here's the flow: + +- Instantiate the metric at the start of the loop +- Call `metric.update_state()` after each batch +- Call `metric.result()` when you need to display the current value of the metric +- Call `metric.reset_state()` when you need to clear the state of the metric +(typically at the end of an epoch) + +Let's use this knowledge to compute `SparseCategoricalAccuracy` on training and +validation data at the end of each epoch: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.SparseCategoricalAccuracy() +val_acc_metric = keras.metrics.SparseCategoricalAccuracy() +``` + +Here's our training & evaluation loop: + +```python +epochs = 2 +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + start_time = time.time() + + # Iterate over the batches of the dataset. + for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): + with tf.GradientTape() as tape: + logits = model(x_batch_train, training=True) + loss_value = loss_fn(y_batch_train, logits) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply(grads, model.trainable_weights) + + # Update training metric. + train_acc_metric.update_state(y_batch_train, logits) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataset: + val_logits = model(x_batch_val, training=False) + # Update val metrics + val_acc_metric.update_state(y_batch_val, val_logits) + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") + print(f"Time taken: {time.time() - start_time:.2f}s") +``` + +## Speeding-up your training step with `tf.function` + +The default runtime in TensorFlow is eager execution. +As such, our training loop above executes eagerly. + +This is great for debugging, but graph compilation has a definite performance +advantage. Describing your computation as a static graph enables the framework +to apply global performance optimizations. This is impossible when +the framework is constrained to greedily execute one operation after another, +with no knowledge of what comes next. + +You can compile into a static graph any function that takes tensors as input. +Just add a `@tf.function` decorator on it, like this: + +```python +@tf.function +def train_step(x, y): + with tf.GradientTape() as tape: + logits = model(x, training=True) + loss_value = loss_fn(y, logits) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply(grads, model.trainable_weights) + train_acc_metric.update_state(y, logits) + return loss_value +``` + +Let's do the same with the evaluation step: + +```python +@tf.function +def test_step(x, y): + val_logits = model(x, training=False) + val_acc_metric.update_state(y, val_logits) +``` + +Now, let's re-run our training loop with this compiled training step: + +```python +epochs = 2 +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + start_time = time.time() + + # Iterate over the batches of the dataset. + for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): + loss_value = train_step(x_batch_train, y_batch_train) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataset: + test_step(x_batch_val, y_batch_val) + + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") + print(f"Time taken: {time.time() - start_time:.2f}s") +``` + +Much faster, isn't it? + +## Low-level handling of losses tracked by the model + +Layers & models recursively track any losses created during the forward pass +by layers that call `self.add_loss(value)`. The resulting list of scalar loss +values are available via the property `model.losses` +at the end of the forward pass. + +If you want to be using these loss components, you should sum them +and add them to the main loss in your training step. + +Consider this layer, that creates an activity regularization loss: + +```python +class ActivityRegularizationLayer(keras.layers.Layer): + def call(self, inputs): + self.add_loss(1e-2 * tf.reduce_sum(inputs)) + return inputs +``` + +Let's build a really simple model that uses it: + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu")(inputs) +# Insert activity regularization as a layer +x = ActivityRegularizationLayer()(x) +x = keras.layers.Dense(64, activation="relu")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +``` + +Here's what our training step should look like now: + +```python +@tf.function +def train_step(x, y): + with tf.GradientTape() as tape: + logits = model(x, training=True) + loss_value = loss_fn(y, logits) + # Add any extra losses created during the forward pass. + loss_value += sum(model.losses) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply(grads, model.trainable_weights) + train_acc_metric.update_state(y, logits) + return loss_value +``` + +## Summary + +Now you know everything there is to know about using built-in training loops and +writing your own from scratch. + +To conclude, here's a simple end-to-end example that ties together everything +you've learned in this guide: a DCGAN trained on MNIST digits. + +## End-to-end example: a GAN training loop from scratch + +You may be familiar with Generative Adversarial Networks (GANs). GANs can generate new +images that look almost real, by learning the latent distribution of a training +dataset of images (the "latent space" of the images). + +A GAN is made of two parts: a "generator" model that maps points in the latent +space to points in image space, a "discriminator" model, a classifier +that can tell the difference between real images (from the training dataset) +and fake images (the output of the generator network). + +A GAN training loop looks like this: + +1) Train the discriminator. +- Sample a batch of random points in the latent space. +- Turn the points into fake images via the "generator" model. +- Get a batch of real images and combine them with the generated images. +- Train the "discriminator" model to classify generated vs. real images. + +2) Train the generator. +- Sample random points in the latent space. +- Turn the points into fake images via the "generator" network. +- Get a batch of real images and combine them with the generated images. +- Train the "generator" model to "fool" the discriminator and classify the fake images +as real. + +For a much more detailed overview of how GANs works, see +[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python). + +Let's implement this training loop. First, create the discriminator meant to classify +fake vs real digits: + +```python +discriminator = keras.Sequential( + [ + keras.Input(shape=(28, 28, 1)), + keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.GlobalMaxPooling2D(), + keras.layers.Dense(1), + ], + name="discriminator", +) +discriminator.summary() +``` + +Then let's create a generator network, +that turns latent vectors into outputs of shape `(28, 28, 1)` (representing +MNIST digits): + +```python +latent_dim = 128 + +generator = keras.Sequential( + [ + keras.Input(shape=(latent_dim,)), + # We want to generate 128 coefficients to reshape into a 7x7x128 map + keras.layers.Dense(7 * 7 * 128), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Reshape((7, 7, 128)), + keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Conv2D(1, (7, 7), padding="same", activation="sigmoid"), + ], + name="generator", +) +``` + +Here's the key bit: the training loop. As you can see it is quite straightforward. The +training step function only takes 17 lines. + +```python +# Instantiate one optimizer for the discriminator and another for the generator. +d_optimizer = keras.optimizers.Adam(learning_rate=0.0003) +g_optimizer = keras.optimizers.Adam(learning_rate=0.0004) + +# Instantiate a loss function. +loss_fn = keras.losses.BinaryCrossentropy(from_logits=True) + + +@tf.function +def train_step(real_images): + # Sample random points in the latent space + random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim)) + # Decode them to fake images + generated_images = generator(random_latent_vectors) + # Combine them with real images + combined_images = tf.concat([generated_images, real_images], axis=0) + + # Assemble labels discriminating real from fake images + labels = tf.concat( + [tf.ones((batch_size, 1)), tf.zeros((real_images.shape[0], 1))], axis=0 + ) + # Add random noise to the labels - important trick! + labels += 0.05 * tf.random.uniform(labels.shape) + + # Train the discriminator + with tf.GradientTape() as tape: + predictions = discriminator(combined_images) + d_loss = loss_fn(labels, predictions) + grads = tape.gradient(d_loss, discriminator.trainable_weights) + d_optimizer.apply(grads, discriminator.trainable_weights) + + # Sample random points in the latent space + random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim)) + # Assemble labels that say "all real images" + misleading_labels = tf.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + with tf.GradientTape() as tape: + predictions = discriminator(generator(random_latent_vectors)) + g_loss = loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, generator.trainable_weights) + g_optimizer.apply(grads, generator.trainable_weights) + return d_loss, g_loss, generated_images +``` + +Let's train our GAN, by repeatedly calling `train_step` on batches of images. + +Since our discriminator and generator are convnets, you're going to want to +run this code on a GPU. + +```python +# Prepare the dataset. We use both the training & test MNIST digits. +batch_size = 64 +(x_train, _), (x_test, _) = keras.datasets.mnist.load_data() +all_digits = np.concatenate([x_train, x_test]) +all_digits = all_digits.astype("float32") / 255.0 +all_digits = np.reshape(all_digits, (-1, 28, 28, 1)) +dataset = tf.data.Dataset.from_tensor_slices(all_digits) +dataset = dataset.shuffle(buffer_size=1024).batch(batch_size) + +epochs = 1 # In practice you need at least 20 epochs to generate nice digits. +save_dir = "./" + +for epoch in range(epochs): + print(f"\nStart epoch {epoch}") + + for step, real_images in enumerate(dataset): + # Train the discriminator & generator on one batch of real images. + d_loss, g_loss, generated_images = train_step(real_images) + + # Logging. + if step % 100 == 0: + # Print metrics + print(f"discriminator loss at step {step}: {d_loss:.2f}") + print(f"adversarial loss at step {step}: {g_loss:.2f}") + + # Save one generated image + img = keras.utils.array_to_img(generated_images[0] * 255.0, scale=False) + img.save(os.path.join(save_dir, f"generated_img_{step}.png")) + + # To limit execution time we stop after 10 steps. + # Remove the lines below to actually train the model! + if step > 10: + break +``` + +That's it! You'll get nice-looking fake MNIST digits after just ~30s of training on the +Colab GPU. + diff --git a/.tether/vignettes-src/writing_a_custom_training_loop_in_torch.Rmd b/.tether/vignettes-src/writing_a_custom_training_loop_in_torch.Rmd new file mode 100644 index 0000000000..8f62b63852 --- /dev/null +++ b/.tether/vignettes-src/writing_a_custom_training_loop_in_torch.Rmd @@ -0,0 +1,382 @@ +--- +title: Writing a training loop from scratch in PyTorch +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2023/06/25 +last-modified: 2023/06/25 +description: Writing low-level training & evaluation loops in PyTorch. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras/guides/writing_a_custom_training_loop_in_torch.py +--- + +## Setup + +```python +import os + +# This guide can only be run with the torch backend. +os.environ["KERAS_BACKEND"] = "torch" + +import torch +import keras +import numpy as np +``` + +## Introduction + +Keras provides default training and evaluation loops, `fit()` and `evaluate()`. +Their usage is covered in the guide +[Training & evaluation with the built-in methods](https://keras.io/guides/training_with_built_in_methods/). + +If you want to customize the learning algorithm of your model while still leveraging +the convenience of `fit()` +(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and +implement your own `train_step()` method, which +is called repeatedly during `fit()`. + +Now, if you want very low-level control over training & evaluation, you should write +your own training & evaluation loops from scratch. This is what this guide is about. + +## A first end-to-end example + +To write a custom training loop, we need the following ingredients: + +- A model to train, of course. +- An optimizer. You could either use a `keras.optimizers` optimizer, +or a native PyTorch optimizer from `torch.optim`. +- A loss function. You could either use a `keras.losses` loss, +or a native PyTorch loss from `torch.nn`. +- A dataset. You could use any format: a `tf.data.Dataset`, +a PyTorch `DataLoader`, a Python generator, etc. + +Let's line them up. We'll use torch-native objects in each case -- +except, of course, for the Keras model. + +First, let's get the model and the MNIST dataset: + +```python +# Let's consider a simple MNIST model +def get_model(): + inputs = keras.Input(shape=(784,), name="digits") + x1 = keras.layers.Dense(64, activation="relu")(inputs) + x2 = keras.layers.Dense(64, activation="relu")(x1) + outputs = keras.layers.Dense(10, name="predictions")(x2) + model = keras.Model(inputs=inputs, outputs=outputs) + return model + + +# Create load up the MNIST dataset and put it in a torch DataLoader +# Prepare the training dataset. +batch_size = 32 +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() +x_train = np.reshape(x_train, (-1, 784)).astype("float32") +x_test = np.reshape(x_test, (-1, 784)).astype("float32") +y_train = keras.utils.to_categorical(y_train) +y_test = keras.utils.to_categorical(y_test) + +# Reserve 10,000 samples for validation. +x_val = x_train[-10000:] +y_val = y_train[-10000:] +x_train = x_train[:-10000] +y_train = y_train[:-10000] + +# Create torch Datasets +train_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_train), torch.from_numpy(y_train) +) +val_dataset = torch.utils.data.TensorDataset( + torch.from_numpy(x_val), torch.from_numpy(y_val) +) + +# Create DataLoaders for the Datasets +train_dataloader = torch.utils.data.DataLoader( + train_dataset, batch_size=batch_size, shuffle=True +) +val_dataloader = torch.utils.data.DataLoader( + val_dataset, batch_size=batch_size, shuffle=False +) +``` + +Next, here's our PyTorch optimizer and our PyTorch loss function: + +```python +# Instantiate a torch optimizer +model = get_model() +optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) + +# Instantiate a torch loss function +loss_fn = torch.nn.CrossEntropyLoss() +``` + +Let's train our model using mini-batch gradient with a custom training loop. + +Calling `loss.backward()` on a loss tensor triggers backpropagation. +Once that's done, your optimizer is magically aware of the gradients for each variable +and can update its variables, which is done via `optimizer.step()`. +Tensors, variables, optimizers are all interconnected to one another via hidden global state. +Also, don't forget to call `model.zero_grad()` before `loss.backward()`, or you won't +get the right gradients for your variables. + +Here's our training loop, step by step: + +- We open a `for` loop that iterates over epochs +- For each epoch, we open a `for` loop that iterates over the dataset, in batches +- For each batch, we call the model on the input data to retrieve the predictions, +then we use them to compute a loss value +- We call `loss.backward()` to +- Outside the scope, we retrieve the gradients of the weights +of the model with regard to the loss +- Finally, we use the optimizer to update the weights of the model based on the +gradients + +```python +epochs = 3 +for epoch in range(epochs): + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(logits, targets) + + # Backward pass + model.zero_grad() + loss.backward() + + # Optimizer variable updates + optimizer.step() + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +As an alternative, let's look at what the loop looks like when using a Keras optimizer +and a Keras loss function. + +Important differences: + +- You retrieve the gradients for the variables via `v.value.grad`, +called on each trainable variable. +- You update your variables via `optimizer.apply()`, which must be +called in a `torch.no_grad()` scope. + +**Also, a big gotcha:** while all NumPy/TensorFlow/JAX/Keras APIs +as well as Python `unittest` APIs use the argument order convention +`fn(y_true, y_pred)` (reference values first, predicted values second), +PyTorch actually uses `fn(y_pred, y_true)` for its losses. +So make sure to invert the order of `logits` and `targets`. + +```python +model = get_model() +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(targets, logits) + + # Backward pass + model.zero_grad() + trainable_weights = [v for v in model.trainable_weights] + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + optimizer.apply(gradients, trainable_weights) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +## Low-level handling of metrics + +Let's add metrics monitoring to this basic training loop. + +You can readily reuse built-in Keras metrics (or custom ones you wrote) in such training +loops written from scratch. Here's the flow: + +- Instantiate the metric at the start of the loop +- Call `metric.update_state()` after each batch +- Call `metric.result()` when you need to display the current value of the metric +- Call `metric.reset_state()` when you need to clear the state of the metric +(typically at the end of an epoch) + +Let's use this knowledge to compute `CategoricalAccuracy` on training and +validation data at the end of each epoch: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.CategoricalAccuracy() +val_acc_metric = keras.metrics.CategoricalAccuracy() +``` + +Here's our training & evaluation loop: + +```python +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(targets, logits) + + # Backward pass + model.zero_grad() + trainable_weights = [v for v in model.trainable_weights] + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + optimizer.apply(gradients, trainable_weights) + + # Update training metric. + train_acc_metric.update_state(targets, logits) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataloader: + val_logits = model(x_batch_val, training=False) + # Update val metrics + val_acc_metric.update_state(y_batch_val, val_logits) + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") +``` + +## Low-level handling of losses tracked by the model + +Layers & models recursively track any losses created during the forward pass +by layers that call `self.add_loss(value)`. The resulting list of scalar loss +values are available via the property `model.losses` +at the end of the forward pass. + +If you want to be using these loss components, you should sum them +and add them to the main loss in your training step. + +Consider this layer, that creates an activity regularization loss: + +```python +class ActivityRegularizationLayer(keras.layers.Layer): + def call(self, inputs): + self.add_loss(1e-2 * torch.sum(inputs)) + return inputs +``` + +Let's build a really simple model that uses it: + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu")(inputs) +# Insert activity regularization as a layer +x = ActivityRegularizationLayer()(x) +x = keras.layers.Dense(64, activation="relu")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +``` + +Here's what our training loop should look like now: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.CategoricalAccuracy() +val_acc_metric = keras.metrics.CategoricalAccuracy() + +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + for step, (inputs, targets) in enumerate(train_dataloader): + # Forward pass + logits = model(inputs) + loss = loss_fn(targets, logits) + if model.losses: + loss = loss + torch.sum(*model.losses) + + # Backward pass + model.zero_grad() + trainable_weights = [v for v in model.trainable_weights] + + # Call torch.Tensor.backward() on the loss to compute gradients + # for the weights. + loss.backward() + gradients = [v.value.grad for v in trainable_weights] + + # Update weights + with torch.no_grad(): + optimizer.apply(gradients, trainable_weights) + + # Update training metric. + train_acc_metric.update_state(targets, logits) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataloader: + val_logits = model(x_batch_val, training=False) + # Update val metrics + val_acc_metric.update_state(y_batch_val, val_logits) + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") +``` + +That's it! + diff --git a/.tether/vignettes-src/writing_a_training_loop_from_scratch.Rmd b/.tether/vignettes-src/writing_a_training_loop_from_scratch.Rmd new file mode 100644 index 0000000000..7453c3ae22 --- /dev/null +++ b/.tether/vignettes-src/writing_a_training_loop_from_scratch.Rmd @@ -0,0 +1,508 @@ +--- +title: Writing a training loop from scratch in TensorFlow +author: '[fchollet](https://twitter.com/fchollet)' +date-created: 2019/03/01 +last-modified: 2023/06/25 +description: Writing low-level training & evaluation loops in TensorFlow. +accelerator: None +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette) +--- + +## Setup + +```python +import time +import os + +# This guide can only be run with the TensorFlow backend. +os.environ["KERAS_BACKEND"] = "tensorflow" + +import tensorflow as tf +import keras +import numpy as np +``` + +## Introduction + +Keras provides default training and evaluation loops, `fit()` and `evaluate()`. +Their usage is covered in the guide +[Training & evaluation with the built-in methods](/guides/training_with_built_in_methods/). + +If you want to customize the learning algorithm of your model while still leveraging +the convenience of `fit()` +(for instance, to train a GAN using `fit()`), you can subclass the `Model` class and +implement your own `train_step()` method, which +is called repeatedly during `fit()`. + +Now, if you want very low-level control over training & evaluation, you should write +your own training & evaluation loops from scratch. This is what this guide is about. + +## A first end-to-end example + +Let's consider a simple MNIST model: + +```python +def get_model(): + inputs = keras.Input(shape=(784,), name="digits") + x1 = keras.layers.Dense(64, activation="relu")(inputs) + x2 = keras.layers.Dense(64, activation="relu")(x1) + outputs = keras.layers.Dense(10, name="predictions")(x2) + model = keras.Model(inputs=inputs, outputs=outputs) + return model + + +model = get_model() +``` + +Let's train it using mini-batch gradient with a custom training loop. + +First, we're going to need an optimizer, a loss function, and a dataset: + +```python +# Instantiate an optimizer. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +# Prepare the training dataset. +batch_size = 32 +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() +x_train = np.reshape(x_train, (-1, 784)) +x_test = np.reshape(x_test, (-1, 784)) + +# Reserve 10,000 samples for validation. +x_val = x_train[-10000:] +y_val = y_train[-10000:] +x_train = x_train[:-10000] +y_train = y_train[:-10000] + +# Prepare the training dataset. +train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) +train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size) + +# Prepare the validation dataset. +val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) +val_dataset = val_dataset.batch(batch_size) +``` + +Calling a model inside a `GradientTape` scope enables you to retrieve the gradients of +the trainable weights of the layer with respect to a loss value. Using an optimizer +instance, you can use these gradients to update these variables (which you can +retrieve using `model.trainable_weights`). + +Here's our training loop, step by step: + +- We open a `for` loop that iterates over epochs +- For each epoch, we open a `for` loop that iterates over the dataset, in batches +- For each batch, we open a `GradientTape()` scope +- Inside this scope, we call the model (forward pass) and compute the loss +- Outside the scope, we retrieve the gradients of the weights +of the model with regard to the loss +- Finally, we use the optimizer to update the weights of the model based on the +gradients + +```python +epochs = 3 +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + + # Iterate over the batches of the dataset. + for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): + # Open a GradientTape to record the operations run + # during the forward pass, which enables auto-differentiation. + with tf.GradientTape() as tape: + # Run the forward pass of the layer. + # The operations that the layer applies + # to its inputs are going to be recorded + # on the GradientTape. + logits = model(x_batch_train, training=True) # Logits for this minibatch + + # Compute the loss value for this minibatch. + loss_value = loss_fn(y_batch_train, logits) + + # Use the gradient tape to automatically retrieve + # the gradients of the trainable variables with respect to the loss. + grads = tape.gradient(loss_value, model.trainable_weights) + + # Run one step of gradient descent by updating + # the value of the variables to minimize the loss. + optimizer.apply(grads, model.trainable_weights) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") +``` + +## Low-level handling of metrics + +Let's add metrics monitoring to this basic loop. + +You can readily reuse the built-in metrics (or custom ones you wrote) in such training +loops written from scratch. Here's the flow: + +- Instantiate the metric at the start of the loop +- Call `metric.update_state()` after each batch +- Call `metric.result()` when you need to display the current value of the metric +- Call `metric.reset_state()` when you need to clear the state of the metric +(typically at the end of an epoch) + +Let's use this knowledge to compute `SparseCategoricalAccuracy` on training and +validation data at the end of each epoch: + +```python +# Get a fresh model +model = get_model() + +# Instantiate an optimizer to train the model. +optimizer = keras.optimizers.Adam(learning_rate=1e-3) +# Instantiate a loss function. +loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + +# Prepare the metrics. +train_acc_metric = keras.metrics.SparseCategoricalAccuracy() +val_acc_metric = keras.metrics.SparseCategoricalAccuracy() +``` + +Here's our training & evaluation loop: + +```python +epochs = 2 +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + start_time = time.time() + + # Iterate over the batches of the dataset. + for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): + with tf.GradientTape() as tape: + logits = model(x_batch_train, training=True) + loss_value = loss_fn(y_batch_train, logits) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply(grads, model.trainable_weights) + + # Update training metric. + train_acc_metric.update_state(y_batch_train, logits) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataset: + val_logits = model(x_batch_val, training=False) + # Update val metrics + val_acc_metric.update_state(y_batch_val, val_logits) + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") + print(f"Time taken: {time.time() - start_time:.2f}s") +``` + +## Speeding-up your training step with `tf.function` + +The default runtime in TensorFlow is eager execution. +As such, our training loop above executes eagerly. + +This is great for debugging, but graph compilation has a definite performance +advantage. Describing your computation as a static graph enables the framework +to apply global performance optimizations. This is impossible when +the framework is constrained to greedily execute one operation after another, +with no knowledge of what comes next. + +You can compile into a static graph any function that takes tensors as input. +Just add a `@tf.function` decorator on it, like this: + +```python +@tf.function +def train_step(x, y): + with tf.GradientTape() as tape: + logits = model(x, training=True) + loss_value = loss_fn(y, logits) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply(grads, model.trainable_weights) + train_acc_metric.update_state(y, logits) + return loss_value +``` + +Let's do the same with the evaluation step: + +```python +@tf.function +def test_step(x, y): + val_logits = model(x, training=False) + val_acc_metric.update_state(y, val_logits) +``` + +Now, let's re-run our training loop with this compiled training step: + +```python +epochs = 2 +for epoch in range(epochs): + print(f"\nStart of epoch {epoch}") + start_time = time.time() + + # Iterate over the batches of the dataset. + for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): + loss_value = train_step(x_batch_train, y_batch_train) + + # Log every 100 batches. + if step % 100 == 0: + print( + f"Training loss (for 1 batch) at step {step}: {float(loss_value):.4f}" + ) + print(f"Seen so far: {(step + 1) * batch_size} samples") + + # Display metrics at the end of each epoch. + train_acc = train_acc_metric.result() + print(f"Training acc over epoch: {float(train_acc):.4f}") + + # Reset training metrics at the end of each epoch + train_acc_metric.reset_state() + + # Run a validation loop at the end of each epoch. + for x_batch_val, y_batch_val in val_dataset: + test_step(x_batch_val, y_batch_val) + + val_acc = val_acc_metric.result() + val_acc_metric.reset_state() + print(f"Validation acc: {float(val_acc):.4f}") + print(f"Time taken: {time.time() - start_time:.2f}s") +``` + +Much faster, isn't it? + +## Low-level handling of losses tracked by the model + +Layers & models recursively track any losses created during the forward pass +by layers that call `self.add_loss(value)`. The resulting list of scalar loss +values are available via the property `model.losses` +at the end of the forward pass. + +If you want to be using these loss components, you should sum them +and add them to the main loss in your training step. + +Consider this layer, that creates an activity regularization loss: + +```python +class ActivityRegularizationLayer(keras.layers.Layer): + def call(self, inputs): + self.add_loss(1e-2 * tf.reduce_sum(inputs)) + return inputs +``` + +Let's build a really simple model that uses it: + +```python +inputs = keras.Input(shape=(784,), name="digits") +x = keras.layers.Dense(64, activation="relu")(inputs) +# Insert activity regularization as a layer +x = ActivityRegularizationLayer()(x) +x = keras.layers.Dense(64, activation="relu")(x) +outputs = keras.layers.Dense(10, name="predictions")(x) + +model = keras.Model(inputs=inputs, outputs=outputs) +``` + +Here's what our training step should look like now: + +```python +@tf.function +def train_step(x, y): + with tf.GradientTape() as tape: + logits = model(x, training=True) + loss_value = loss_fn(y, logits) + # Add any extra losses created during the forward pass. + loss_value += sum(model.losses) + grads = tape.gradient(loss_value, model.trainable_weights) + optimizer.apply(grads, model.trainable_weights) + train_acc_metric.update_state(y, logits) + return loss_value +``` + +## Summary + +Now you know everything there is to know about using built-in training loops and +writing your own from scratch. + +To conclude, here's a simple end-to-end example that ties together everything +you've learned in this guide: a DCGAN trained on MNIST digits. + +## End-to-end example: a GAN training loop from scratch + +You may be familiar with Generative Adversarial Networks (GANs). GANs can generate new +images that look almost real, by learning the latent distribution of a training +dataset of images (the "latent space" of the images). + +A GAN is made of two parts: a "generator" model that maps points in the latent +space to points in image space, a "discriminator" model, a classifier +that can tell the difference between real images (from the training dataset) +and fake images (the output of the generator network). + +A GAN training loop looks like this: + +1) Train the discriminator. +- Sample a batch of random points in the latent space. +- Turn the points into fake images via the "generator" model. +- Get a batch of real images and combine them with the generated images. +- Train the "discriminator" model to classify generated vs. real images. + +2) Train the generator. +- Sample random points in the latent space. +- Turn the points into fake images via the "generator" network. +- Get a batch of real images and combine them with the generated images. +- Train the "generator" model to "fool" the discriminator and classify the fake images +as real. + +For a much more detailed overview of how GANs works, see +[Deep Learning with Python](https://www.manning.com/books/deep-learning-with-python). + +Let's implement this training loop. First, create the discriminator meant to classify +fake vs real digits: + +```python +discriminator = keras.Sequential( + [ + keras.Input(shape=(28, 28, 1)), + keras.layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.GlobalMaxPooling2D(), + keras.layers.Dense(1), + ], + name="discriminator", +) +discriminator.summary() +``` + +Then let's create a generator network, +that turns latent vectors into outputs of shape `(28, 28, 1)` (representing +MNIST digits): + +```python +latent_dim = 128 + +generator = keras.Sequential( + [ + keras.Input(shape=(latent_dim,)), + # We want to generate 128 coefficients to reshape into a 7x7x128 map + keras.layers.Dense(7 * 7 * 128), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Reshape((7, 7, 128)), + keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"), + keras.layers.LeakyReLU(negative_slope=0.2), + keras.layers.Conv2D(1, (7, 7), padding="same", activation="sigmoid"), + ], + name="generator", +) +``` + +Here's the key bit: the training loop. As you can see it is quite straightforward. The +training step function only takes 17 lines. + +```python +# Instantiate one optimizer for the discriminator and another for the generator. +d_optimizer = keras.optimizers.Adam(learning_rate=0.0003) +g_optimizer = keras.optimizers.Adam(learning_rate=0.0004) + +# Instantiate a loss function. +loss_fn = keras.losses.BinaryCrossentropy(from_logits=True) + + +@tf.function +def train_step(real_images): + # Sample random points in the latent space + random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim)) + # Decode them to fake images + generated_images = generator(random_latent_vectors) + # Combine them with real images + combined_images = tf.concat([generated_images, real_images], axis=0) + + # Assemble labels discriminating real from fake images + labels = tf.concat( + [tf.ones((batch_size, 1)), tf.zeros((real_images.shape[0], 1))], axis=0 + ) + # Add random noise to the labels - important trick! + labels += 0.05 * tf.random.uniform(labels.shape) + + # Train the discriminator + with tf.GradientTape() as tape: + predictions = discriminator(combined_images) + d_loss = loss_fn(labels, predictions) + grads = tape.gradient(d_loss, discriminator.trainable_weights) + d_optimizer.apply(grads, discriminator.trainable_weights) + + # Sample random points in the latent space + random_latent_vectors = tf.random.normal(shape=(batch_size, latent_dim)) + # Assemble labels that say "all real images" + misleading_labels = tf.zeros((batch_size, 1)) + + # Train the generator (note that we should *not* update the weights + # of the discriminator)! + with tf.GradientTape() as tape: + predictions = discriminator(generator(random_latent_vectors)) + g_loss = loss_fn(misleading_labels, predictions) + grads = tape.gradient(g_loss, generator.trainable_weights) + g_optimizer.apply(grads, generator.trainable_weights) + return d_loss, g_loss, generated_images +``` + +Let's train our GAN, by repeatedly calling `train_step` on batches of images. + +Since our discriminator and generator are convnets, you're going to want to +run this code on a GPU. + +```python +# Prepare the dataset. We use both the training & test MNIST digits. +batch_size = 64 +(x_train, _), (x_test, _) = keras.datasets.mnist.load_data() +all_digits = np.concatenate([x_train, x_test]) +all_digits = all_digits.astype("float32") / 255.0 +all_digits = np.reshape(all_digits, (-1, 28, 28, 1)) +dataset = tf.data.Dataset.from_tensor_slices(all_digits) +dataset = dataset.shuffle(buffer_size=1024).batch(batch_size) + +epochs = 1 # In practice you need at least 20 epochs to generate nice digits. +save_dir = "./" + +for epoch in range(epochs): + print(f"\nStart epoch {epoch}") + + for step, real_images in enumerate(dataset): + # Train the discriminator & generator on one batch of real images. + d_loss, g_loss, generated_images = train_step(real_images) + + # Logging. + if step % 100 == 0: + # Print metrics + print(f"discriminator loss at step {step}: {d_loss:.2f}") + print(f"adversarial loss at step {step}: {g_loss:.2f}") + + # Save one generated image + img = keras.utils.array_to_img(generated_images[0] * 255.0, scale=False) + img.save(os.path.join(save_dir, f"generated_img_{step}.png")) + + # To limit execution time we stop after 10 steps. + # Remove the lines below to actually train the model! + if step > 10: + break +``` + +That's it! You'll get nice-looking fake MNIST digits after just ~30s of training on the +Colab GPU. + diff --git a/.tether/vignettes-src/writing_your_own_callbacks.Rmd b/.tether/vignettes-src/writing_your_own_callbacks.Rmd new file mode 100644 index 0000000000..cee15b9c39 --- /dev/null +++ b/.tether/vignettes-src/writing_your_own_callbacks.Rmd @@ -0,0 +1,401 @@ +--- +title: Writing your own callbacks +authors: Rick Chao, Francois Chollet +date-created: 2019/03/20 +last-modified: 2023/06/25 +description: Complete guide to writing new Keras callbacks. +accelerator: GPU +output: rmarkdown::html_vignette +knit: ({source(here::here("tools/knit.R")); knit_vignette}) +tether: ~/github/keras-team/keras-io/guides/writing_your_own_callbacks.py +--- + +## Introduction + +A callback is a powerful tool to customize the behavior of a Keras model during +training, evaluation, or inference. Examples include `keras.callbacks.TensorBoard` +to visualize training progress and results with TensorBoard, or +`keras.callbacks.ModelCheckpoint` to periodically save your model during training. + +In this guide, you will learn what a Keras callback is, what it can do, and how you can +build your own. We provide a few demos of simple callback applications to get you +started. + +## Setup + +```python +import numpy as np +import keras +``` + +## Keras callbacks overview + +All callbacks subclass the `keras.callbacks.Callback` class, and +override a set of methods called at various stages of training, testing, and +predicting. Callbacks are useful to get a view on internal states and statistics of +the model during training. + +You can pass a list of callbacks (as the keyword argument `callbacks`) to the following +model methods: + +- `keras.Model.fit()` +- `keras.Model.evaluate()` +- `keras.Model.predict()` + +## An overview of callback methods + +### Global methods + +#### `on_(train|test|predict)_begin(self, logs=None)` + +Called at the beginning of `fit`/`evaluate`/`predict`. + +#### `on_(train|test|predict)_end(self, logs=None)` + +Called at the end of `fit`/`evaluate`/`predict`. + +### Batch-level methods for training/testing/predicting + +#### `on_(train|test|predict)_batch_begin(self, batch, logs=None)` + +Called right before processing a batch during training/testing/predicting. + +#### `on_(train|test|predict)_batch_end(self, batch, logs=None)` + +Called at the end of training/testing/predicting a batch. Within this method, `logs` is +a dict containing the metrics results. + +### Epoch-level methods (training only) + +#### `on_epoch_begin(self, epoch, logs=None)` + +Called at the beginning of an epoch during training. + +#### `on_epoch_end(self, epoch, logs=None)` + +Called at the end of an epoch during training. + +## A basic example + +Let's take a look at a concrete example. To get started, let's import tensorflow and +define a simple Sequential Keras model: + +```python +# Define the Keras model to add callbacks to +def get_model(): + model = keras.Sequential() + model.add(keras.layers.Dense(1)) + model.compile( + optimizer=keras.optimizers.RMSprop(learning_rate=0.1), + loss="mean_squared_error", + metrics=["mean_absolute_error"], + ) + return model +``` + +Then, load the MNIST data for training and testing from Keras datasets API: + +```python +# Load example MNIST data and pre-process it +(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() +x_train = x_train.reshape(-1, 784).astype("float32") / 255.0 +x_test = x_test.reshape(-1, 784).astype("float32") / 255.0 + +# Limit the data to 1000 samples +x_train = x_train[:1000] +y_train = y_train[:1000] +x_test = x_test[:1000] +y_test = y_test[:1000] +``` + +Now, define a simple custom callback that logs: + +- When `fit`/`evaluate`/`predict` starts & ends +- When each epoch starts & ends +- When each training batch starts & ends +- When each evaluation (test) batch starts & ends +- When each inference (prediction) batch starts & ends + +```python +class CustomCallback(keras.callbacks.Callback): + def on_train_begin(self, logs=None): + keys = list(logs.keys()) + print("Starting training; got log keys: {}".format(keys)) + + def on_train_end(self, logs=None): + keys = list(logs.keys()) + print("Stop training; got log keys: {}".format(keys)) + + def on_epoch_begin(self, epoch, logs=None): + keys = list(logs.keys()) + print("Start epoch {} of training; got log keys: {}".format(epoch, keys)) + + def on_epoch_end(self, epoch, logs=None): + keys = list(logs.keys()) + print("End epoch {} of training; got log keys: {}".format(epoch, keys)) + + def on_test_begin(self, logs=None): + keys = list(logs.keys()) + print("Start testing; got log keys: {}".format(keys)) + + def on_test_end(self, logs=None): + keys = list(logs.keys()) + print("Stop testing; got log keys: {}".format(keys)) + + def on_predict_begin(self, logs=None): + keys = list(logs.keys()) + print("Start predicting; got log keys: {}".format(keys)) + + def on_predict_end(self, logs=None): + keys = list(logs.keys()) + print("Stop predicting; got log keys: {}".format(keys)) + + def on_train_batch_begin(self, batch, logs=None): + keys = list(logs.keys()) + print("...Training: start of batch {}; got log keys: {}".format(batch, keys)) + + def on_train_batch_end(self, batch, logs=None): + keys = list(logs.keys()) + print("...Training: end of batch {}; got log keys: {}".format(batch, keys)) + + def on_test_batch_begin(self, batch, logs=None): + keys = list(logs.keys()) + print("...Evaluating: start of batch {}; got log keys: {}".format(batch, keys)) + + def on_test_batch_end(self, batch, logs=None): + keys = list(logs.keys()) + print("...Evaluating: end of batch {}; got log keys: {}".format(batch, keys)) + + def on_predict_batch_begin(self, batch, logs=None): + keys = list(logs.keys()) + print("...Predicting: start of batch {}; got log keys: {}".format(batch, keys)) + + def on_predict_batch_end(self, batch, logs=None): + keys = list(logs.keys()) + print("...Predicting: end of batch {}; got log keys: {}".format(batch, keys)) +``` + +Let's try it out: + +```python +model = get_model() +model.fit( + x_train, + y_train, + batch_size=128, + epochs=1, + verbose=0, + validation_split=0.5, + callbacks=[CustomCallback()], +) + +res = model.evaluate( + x_test, y_test, batch_size=128, verbose=0, callbacks=[CustomCallback()] +) + +res = model.predict(x_test, batch_size=128, callbacks=[CustomCallback()]) +``` + +### Usage of `logs` dict + +The `logs` dict contains the loss value, and all the metrics at the end of a batch or +epoch. Example includes the loss and mean absolute error. + +```python +class LossAndErrorPrintingCallback(keras.callbacks.Callback): + def on_train_batch_end(self, batch, logs=None): + print( + "Up to batch {}, the average loss is {:7.2f}.".format(batch, logs["loss"]) + ) + + def on_test_batch_end(self, batch, logs=None): + print( + "Up to batch {}, the average loss is {:7.2f}.".format(batch, logs["loss"]) + ) + + def on_epoch_end(self, epoch, logs=None): + print( + "The average loss for epoch {} is {:7.2f} " + "and mean absolute error is {:7.2f}.".format( + epoch, logs["loss"], logs["mean_absolute_error"] + ) + ) + + +model = get_model() +model.fit( + x_train, + y_train, + batch_size=128, + epochs=2, + verbose=0, + callbacks=[LossAndErrorPrintingCallback()], +) + +res = model.evaluate( + x_test, + y_test, + batch_size=128, + verbose=0, + callbacks=[LossAndErrorPrintingCallback()], +) +``` + +## Usage of `self.model` attribute + +In addition to receiving log information when one of their methods is called, +callbacks have access to the model associated with the current round of +training/evaluation/inference: `self.model`. + +Here are a few of the things you can do with `self.model` in a callback: + +- Set `self.model.stop_training = True` to immediately interrupt training. +- Mutate hyperparameters of the optimizer (available as `self.model.optimizer`), +such as `self.model.optimizer.learning_rate`. +- Save the model at period intervals. +- Record the output of `model.predict()` on a few test samples at the end of each +epoch, to use as a sanity check during training. +- Extract visualizations of intermediate features at the end of each epoch, to monitor +what the model is learning over time. +- etc. + +Let's see this in action in a couple of examples. + +## Examples of Keras callback applications + +### Early stopping at minimum loss + +This first example shows the creation of a `Callback` that stops training when the +minimum of loss has been reached, by setting the attribute `self.model.stop_training` +(boolean). Optionally, you can provide an argument `patience` to specify how many +epochs we should wait before stopping after having reached a local minimum. + +`keras.callbacks.EarlyStopping` provides a more complete and general implementation. + +```python +class EarlyStoppingAtMinLoss(keras.callbacks.Callback): + """Stop training when the loss is at its min, i.e. the loss stops decreasing. + + Arguments: + patience: Number of epochs to wait after min has been hit. After this + number of no improvement, training stops. + """ + + def __init__(self, patience=0): + super().__init__() + self.patience = patience + # best_weights to store the weights at which the minimum loss occurs. + self.best_weights = None + + def on_train_begin(self, logs=None): + # The number of epoch it has waited when loss is no longer minimum. + self.wait = 0 + # The epoch the training stops at. + self.stopped_epoch = 0 + # Initialize the best as infinity. + self.best = np.Inf + + def on_epoch_end(self, epoch, logs=None): + current = logs.get("loss") + if np.less(current, self.best): + self.best = current + self.wait = 0 + # Record the best weights if current results is better (less). + self.best_weights = self.model.get_weights() + else: + self.wait += 1 + if self.wait >= self.patience: + self.stopped_epoch = epoch + self.model.stop_training = True + print("Restoring model weights from the end of the best epoch.") + self.model.set_weights(self.best_weights) + + def on_train_end(self, logs=None): + if self.stopped_epoch > 0: + print(f"Epoch {self.stopped_epoch + 1}: early stopping") + + +model = get_model() +model.fit( + x_train, + y_train, + batch_size=64, + epochs=30, + verbose=0, + callbacks=[LossAndErrorPrintingCallback(), EarlyStoppingAtMinLoss()], +) +``` + +### Learning rate scheduling + +In this example, we show how a custom Callback can be used to dynamically change the +learning rate of the optimizer during the course of training. + +See `callbacks.LearningRateScheduler` for a more general implementations. + +```python +class CustomLearningRateScheduler(keras.callbacks.Callback): + """Learning rate scheduler which sets the learning rate according to schedule. + + Arguments: + schedule: a function that takes an epoch index + (integer, indexed from 0) and current learning rate + as inputs and returns a new learning rate as output (float). + """ + + def __init__(self, schedule): + super().__init__() + self.schedule = schedule + + def on_epoch_begin(self, epoch, logs=None): + if not hasattr(self.model.optimizer, "learning_rate"): + raise ValueError('Optimizer must have a "learning_rate" attribute.') + # Get the current learning rate from model's optimizer. + lr = self.model.optimizer.learning_rate + # Call schedule function to get the scheduled learning rate. + scheduled_lr = self.schedule(epoch, lr) + # Set the value back to the optimizer before this epoch starts + self.model.optimizer.learning_rate = scheduled_lr + print(f"\nEpoch {epoch}: Learning rate is {float(np.array(scheduled_lr))}.") + + +LR_SCHEDULE = [ + # (epoch to start, learning rate) tuples + (3, 0.05), + (6, 0.01), + (9, 0.005), + (12, 0.001), +] + + +def lr_schedule(epoch, lr): + """Helper function to retrieve the scheduled learning rate based on epoch.""" + if epoch < LR_SCHEDULE[0][0] or epoch > LR_SCHEDULE[-1][0]: + return lr + for i in range(len(LR_SCHEDULE)): + if epoch == LR_SCHEDULE[i][0]: + return LR_SCHEDULE[i][1] + return lr + + +model = get_model() +model.fit( + x_train, + y_train, + batch_size=64, + epochs=15, + verbose=0, + callbacks=[ + LossAndErrorPrintingCallback(), + CustomLearningRateScheduler(lr_schedule), + ], +) +``` + +### Built-in Keras callbacks + +Be sure to check out the existing Keras callbacks by +reading the [API docs](https://keras.io/api/callbacks/). +Applications include logging to CSV, saving +the model, visualizing metrics in TensorBoard, and a lot more! + diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION deleted file mode 100644 index b452a200f9..0000000000 --- a/CRAN-SUBMISSION +++ /dev/null @@ -1,3 +0,0 @@ -Version: 2.9.0 -Date: 2022-05-23 14:48:58 UTC -SHA: eee28b4d5401ecdabfed1919a9cc696c623e27cf diff --git a/DESCRIPTION b/DESCRIPTION index 1533185fe5..23dc30827a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,15 +1,15 @@ -Package: keras +Package: keras3 Type: Package Title: R Interface to 'Keras' -Version: 2.9.0.9000 +Version: 0.2.0.9000 Authors@R: c( - person("Tomasz", "Kalinowski", role = c("ctb", "cph", "cre"), - email = "tomasz.kalinowski@rstudio.com"), - person("Daniel", "Falbel", role = c("ctb", "cph"), email = "daniel@rstudio.com"), + person("Tomasz", "Kalinowski", role = c("aut", "cph", "cre"), + email = "tomasz@posit.co"), + person("Daniel", "Falbel", role = c("ctb", "cph"), email = "daniel@posit.co"), person("JJ", "Allaire", role = c("aut", "cph")), person("François", "Chollet", role = c("aut", "cph")), - person("RStudio", role = c("ctb", "cph", "fnd")), - person("Google", role = c("ctb", "cph", "fnd")), + person("Posit Software, PBC", role = c("cph", "fnd")), + person("Google", role = c("cph", "fnd")), person("Yuan", "Tang", role = c("ctb", "cph"), email = "terrytangyuan@gmail.com", comment = c(ORCID = "0000-0001-5243-233X")), @@ -18,26 +18,25 @@ Authors@R: c( person("Sigrid", "Keydana", role = c("ctb")) ) Description: Interface to 'Keras' , a high-level neural - networks 'API'. 'Keras' was developed with a focus on enabling fast experimentation, + networks API. 'Keras' was developed with a focus on enabling fast experimentation, supports both convolution based networks and recurrent networks (as well as - combinations of the two), and runs seamlessly on both 'CPU' and 'GPU' devices. + combinations of the two), and runs seamlessly on both CPU and GPU devices. Encoding: UTF-8 License: MIT + file LICENSE -URL: https://keras.rstudio.com +URL: https://keras.posit.co/, https://github.com/rstudio/keras BugReports: https://github.com/rstudio/keras/issues Depends: - R (>= 3.4) + R (>= 4.0) Imports: generics (>= 0.0.1), - reticulate (> 1.22), - tensorflow (>= 2.8.0), - tfruns (>= 1.0), + reticulate (>= 1.35.0.9000), + tensorflow (>= 2.15.0.9000), + tfruns (>= 1.5.2), magrittr, zeallot, + fastmap, glue, - methods, - R6, - ellipsis, + cli, rlang Suggests: ggplot2, @@ -48,7 +47,10 @@ Suggests: tfdatasets, withr, png, + jsonlite, + purrr, + rstudioapi, + R6, jpeg -Roxygen: list(markdown = TRUE, r6 = FALSE) -RoxygenNote: 7.2.0 +RoxygenNote: 7.3.1 VignetteBuilder: knitr diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index 33048b48f5..0000000000 --- a/Jenkinsfile +++ /dev/null @@ -1,21 +0,0 @@ -#!groovy - -properties([pipelineTriggers([githubPush()]), - disableConcurrentBuilds(), - buildDiscarder(logRotator(artifactDaysToKeepStr: '', artifactNumToKeepStr: '', daysToKeepStr: '', numToKeepStr: '10')) -]) - -node('docker') { - timestamps { - ansiColor('xterm') { - stage('Checkout source') { - checkout scm - } - - stage('Publish to S3') { - milestone label: 'deploy' - sh 'aws s3 sync website s3://keras.rstudio.com/ --acl public-read --cache-control "public,max-age=900"' - } - } - } -} diff --git a/LICENSE b/LICENSE index 87be06ff4e..956161c7b3 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ -YEAR: 2017 -COPYRIGHT HOLDER: RStudio, Inc; Google, Inc; François Chollet; Yuan Tang +YEAR: 2024 +COPYRIGHT HOLDER: Posit Software, PBC; Google, Inc; François Chollet; Yuan Tang diff --git a/NAMESPACE b/NAMESPACE index 3efa887ba2..d0d69af6d2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,51 +1,84 @@ # Generated by roxygen2: do not edit by hand +S3method("$",python_builtin_super_getter) +S3method("$<-",keras.src.callbacks.callback.Callback) +S3method("+",keras.src.backend.common.keras_tensor.KerasTensor) +S3method("==",keras.src.backend.common.keras_tensor.KerasTensor) +S3method("[",keras_shape) +S3method("[[",python_builtin_super_getter) +S3method(as.array,jaxlib.xla_extension.ArrayImpl) +S3method(as.array,keras.src.backend.common.variables.KerasVariable) S3method(as.data.frame,keras_training_history) -S3method(compile,keras.engine.training.Model) -S3method(evaluate,keras.engine.training.Model) -S3method(export_savedmodel,keras.engine.training.Model) -S3method(fit,keras.engine.training.Model) -S3method(format,keras.engine.training.Model) -S3method(plot,keras.engine.training.Model) +S3method(as.double,jaxlib.xla_extension.ArrayImpl) +S3method(as.double,keras.src.backend.common.variables.KerasVariable) +S3method(as.integer,keras.src.backend.common.variables.KerasVariable) +S3method(as.integer,keras_shape) +S3method(as.list,keras_shape) +S3method(as.numeric,keras.src.backend.common.variables.KerasVariable) +S3method(base::all.equal,keras.src.backend.common.keras_tensor.KerasTensor) +S3method(base::all.equal,keras.src.backend.common.variables.KerasVariable) +S3method(compile,keras.src.models.model.Model) +S3method(destructure,keras_shape) +S3method(evaluate,keras.src.models.model.Model) +S3method(fit,keras.src.models.model.Model) +S3method(format,keras.src.models.model.Model) +S3method(format,keras_shape) +S3method(plot,keras.src.models.model.Model) S3method(plot,keras_training_history) -S3method(predict,keras.engine.training.Model) -S3method(print,keras.engine.training.Model) +S3method(predict,keras.src.models.model.Model) +S3method(print,keras.src.models.model.Model) +S3method(print,keras_shape) S3method(print,keras_training_history) -S3method(print,kerastools.model.RModel) -S3method(print,py_R6ClassGenerator) -S3method(py_str,keras.engine.training.Model) -S3method(py_to_r_wrapper,keras.engine.base_layer.Layer) -S3method(py_to_r_wrapper,keras.engine.training.Model) -S3method(py_to_r_wrapper,kerastools.model.RModel) +S3method(py_str,keras.src.models.model.Model) +S3method(py_to_r,tensorflow.python.ops.gen_linalg_ops.Qr) +S3method(py_to_r,tensorflow.python.ops.gen_nn_ops.TopKV2) +S3method(py_to_r_wrapper,keras.src.initializers.initializer.Initializer) +S3method(py_to_r_wrapper,keras.src.layers.layer.Layer) +S3method(py_to_r_wrapper,keras.src.losses.loss.Loss) +S3method(py_to_r_wrapper,keras.src.metrics.metric.Metric) S3method(r_to_py,R6ClassGenerator) -S3method(r_to_py,keras_layer_wrapper) -S3method(summary,keras.engine.training.Model) -S3method(summary,kerastools.model.RModel) +S3method(r_to_py,keras_shape) +S3method(summary,keras.src.models.model.Model) +S3method(tensorflow::export_savedmodel,keras.src.models.model.Model) export("%<-%") export("%<-active%") export("%<>%") export("%>%") export("%py_class%") -export(KerasCallback) -export(KerasConstraint) -export(KerasLayer) -export(KerasWrapper) +export(Callback) +export(Constraint) export(Layer) +export(Loss) +export(Metric) +export(Model) export(activation_elu) export(activation_exponential) export(activation_gelu) export(activation_hard_sigmoid) +export(activation_hard_silu) +export(activation_hard_swish) +export(activation_leaky_relu) export(activation_linear) +export(activation_log_softmax) +export(activation_mish) export(activation_relu) +export(activation_relu6) export(activation_selu) export(activation_sigmoid) +export(activation_silu) export(activation_softmax) export(activation_softplus) export(activation_softsign) -export(activation_swish) export(activation_tanh) +export(active_property) export(adapt) -export(application_densenet) +export(all_dims) +export(application_convnext_base) +export(application_convnext_large) +export(application_convnext_small) +export(application_convnext_tiny) +export(application_convnext_xlarge) +export(application_decode_predictions) export(application_densenet121) export(application_densenet169) export(application_densenet201) @@ -57,15 +90,22 @@ export(application_efficientnet_b4) export(application_efficientnet_b5) export(application_efficientnet_b6) export(application_efficientnet_b7) +export(application_efficientnet_v2b0) +export(application_efficientnet_v2b1) +export(application_efficientnet_v2b2) +export(application_efficientnet_v2b3) +export(application_efficientnet_v2l) +export(application_efficientnet_v2m) +export(application_efficientnet_v2s) export(application_inception_resnet_v2) export(application_inception_v3) export(application_mobilenet) export(application_mobilenet_v2) export(application_mobilenet_v3_large) export(application_mobilenet_v3_small) -export(application_nasnet) export(application_nasnetlarge) export(application_nasnetmobile) +export(application_preprocess_inputs) export(application_resnet101) export(application_resnet101_v2) export(application_resnet152) @@ -76,8 +116,9 @@ export(application_vgg16) export(application_vgg19) export(application_xception) export(array_reshape) +export(as_iterator) export(as_tensor) -export(backend) +export(audio_dataset_from_directory) export(bidirectional) export(callback_backup_and_restore) export(callback_csv_logger) @@ -85,21 +126,36 @@ export(callback_early_stopping) export(callback_lambda) export(callback_learning_rate_scheduler) export(callback_model_checkpoint) -export(callback_progbar_logger) export(callback_reduce_lr_on_plateau) export(callback_remote_monitor) +export(callback_swap_ema_weights) export(callback_tensorboard) -export(callback_terminate_on_naan) +export(callback_terminate_on_nan) +export(clear_session) export(clone_model) export(compile) +export(config_backend) +export(config_disable_interactive_logging) +export(config_disable_traceback_filtering) +export(config_dtype_policy) +export(config_enable_interactive_logging) +export(config_enable_traceback_filtering) +export(config_enable_unsafe_deserialization) +export(config_epsilon) +export(config_floatx) +export(config_image_data_format) +export(config_is_interactive_logging_enabled) +export(config_is_traceback_filtering_enabled) +export(config_set_backend) +export(config_set_dtype_policy) +export(config_set_epsilon) +export(config_set_floatx) +export(config_set_image_data_format) export(constraint_maxnorm) export(constraint_minmaxnorm) export(constraint_nonneg) export(constraint_unitnorm) export(count_params) -export(create_layer) -export(create_layer_wrapper) -export(create_wrapper) export(custom_metric) export(dataset_boston_housing) export(dataset_cifar10) @@ -110,48 +166,44 @@ export(dataset_imdb_word_index) export(dataset_mnist) export(dataset_reuters) export(dataset_reuters_word_index) -export(densenet_preprocess_input) +export(deserialize_keras_object) export(evaluate) -export(evaluate_generator) export(export_savedmodel) +export(feature_cross) +export(feature_custom) +export(feature_float) +export(feature_float_discretized) +export(feature_float_normalized) +export(feature_float_rescaled) +export(feature_integer_categorical) +export(feature_integer_hashed) +export(feature_string_categorical) +export(feature_string_hashed) export(fit) -export(fit_generator) -export(fit_image_data_generator) -export(fit_text_tokenizer) export(flag_boolean) export(flag_integer) export(flag_numeric) export(flag_string) export(flags) -export(flow_images_from_data) -export(flow_images_from_dataframe) -export(flow_images_from_directory) export(freeze_weights) export(from_config) -export(generator_next) export(get_config) +export(get_custom_objects) export(get_file) -export(get_input_at) -export(get_input_mask_at) -export(get_input_shape_at) export(get_layer) -export(get_output_at) -export(get_output_mask_at) -export(get_output_shape_at) +export(get_registered_name) +export(get_registered_object) +export(get_source_inputs) export(get_vocabulary) export(get_weights) -export(hdf5_matrix) -export(image_array_resize) export(image_array_save) -export(image_data_generator) export(image_dataset_from_directory) +export(image_from_array) export(image_load) +export(image_smart_resize) export(image_to_array) export(imagenet_decode_predictions) export(imagenet_preprocess_input) -export(implementation) -export(inception_resnet_v2_preprocess_input) -export(inception_v3_preprocess_input) export(initializer_constant) export(initializer_glorot_normal) export(initializer_glorot_uniform) @@ -168,172 +220,18 @@ export(initializer_truncated_normal) export(initializer_variance_scaling) export(initializer_zeros) export(install_keras) -export(is_keras_available) -export(k_abs) -export(k_all) -export(k_any) -export(k_arange) -export(k_argmax) -export(k_argmin) -export(k_backend) -export(k_batch_dot) -export(k_batch_flatten) -export(k_batch_get_value) -export(k_batch_normalization) -export(k_batch_set_value) -export(k_bias_add) -export(k_binary_crossentropy) -export(k_cast) -export(k_cast_to_floatx) -export(k_categorical_crossentropy) -export(k_clear_session) -export(k_clip) -export(k_concatenate) -export(k_constant) -export(k_conv1d) -export(k_conv2d) -export(k_conv2d_transpose) -export(k_conv3d) -export(k_conv3d_transpose) -export(k_cos) -export(k_count_params) -export(k_ctc_batch_cost) -export(k_ctc_decode) -export(k_ctc_label_dense_to_sparse) -export(k_cumprod) -export(k_cumsum) -export(k_depthwise_conv2d) -export(k_dot) -export(k_dropout) -export(k_dtype) -export(k_elu) -export(k_epsilon) -export(k_equal) -export(k_eval) -export(k_exp) -export(k_expand_dims) -export(k_eye) -export(k_flatten) -export(k_floatx) -export(k_foldl) -export(k_foldr) -export(k_function) -export(k_gather) -export(k_get_session) -export(k_get_uid) -export(k_get_value) -export(k_get_variable_shape) -export(k_gradients) -export(k_greater) -export(k_greater_equal) -export(k_hard_sigmoid) -export(k_identity) -export(k_image_data_format) -export(k_in_test_phase) -export(k_in_top_k) -export(k_in_train_phase) -export(k_int_shape) -export(k_is_keras_tensor) -export(k_is_placeholder) -export(k_is_sparse) -export(k_is_tensor) -export(k_l2_normalize) -export(k_learning_phase) -export(k_less) -export(k_less_equal) -export(k_local_conv1d) -export(k_local_conv2d) -export(k_log) -export(k_logsumexp) -export(k_manual_variable_initialization) -export(k_map_fn) -export(k_max) -export(k_maximum) -export(k_mean) -export(k_min) -export(k_minimum) -export(k_moving_average_update) -export(k_ndim) -export(k_normalize_batch_in_training) -export(k_not_equal) -export(k_one_hot) -export(k_ones) -export(k_ones_like) -export(k_permute_dimensions) -export(k_placeholder) -export(k_pool2d) -export(k_pool3d) -export(k_pow) -export(k_print_tensor) -export(k_prod) -export(k_random_bernoulli) -export(k_random_binomial) -export(k_random_normal) -export(k_random_normal_variable) -export(k_random_uniform) -export(k_random_uniform_variable) -export(k_relu) -export(k_repeat) -export(k_repeat_elements) -export(k_reset_uids) -export(k_reshape) -export(k_resize_images) -export(k_resize_volumes) -export(k_reverse) -export(k_rnn) -export(k_round) -export(k_separable_conv2d) -export(k_set_epsilon) -export(k_set_floatx) -export(k_set_image_data_format) -export(k_set_learning_phase) -export(k_set_session) -export(k_set_value) -export(k_shape) -export(k_sigmoid) -export(k_sign) -export(k_sin) -export(k_softmax) -export(k_softplus) -export(k_softsign) -export(k_sparse_categorical_crossentropy) -export(k_spatial_2d_padding) -export(k_spatial_3d_padding) -export(k_sqrt) -export(k_square) -export(k_squeeze) -export(k_stack) -export(k_std) -export(k_stop_gradient) -export(k_sum) -export(k_switch) -export(k_tanh) -export(k_temporal_padding) -export(k_tile) -export(k_to_dense) -export(k_transpose) -export(k_truncated_normal) -export(k_unstack) -export(k_update) -export(k_update_add) -export(k_update_sub) -export(k_var) -export(k_variable) -export(k_zeros) -export(k_zeros_like) +export(iter_next) +export(iterate) export(keras) -export(keras_array) +export(keras_input) export(keras_model) -export(keras_model_custom) export(keras_model_sequential) export(layer_activation) export(layer_activation_elu) export(layer_activation_leaky_relu) export(layer_activation_parametric_relu) export(layer_activation_relu) -export(layer_activation_selu) export(layer_activation_softmax) -export(layer_activation_thresholded_relu) export(layer_activity_regularization) export(layer_add) export(layer_additive_attention) @@ -344,6 +242,7 @@ export(layer_average_pooling_1d) export(layer_average_pooling_2d) export(layer_average_pooling_3d) export(layer_batch_normalization) +export(layer_bidirectional) export(layer_category_encoding) export(layer_center_crop) export(layer_concatenate) @@ -359,17 +258,17 @@ export(layer_conv_lstm_3d) export(layer_cropping_1d) export(layer_cropping_2d) export(layer_cropping_3d) -export(layer_cudnn_gru) -export(layer_cudnn_lstm) export(layer_dense) -export(layer_dense_features) export(layer_depthwise_conv_1d) export(layer_depthwise_conv_2d) export(layer_discretization) export(layer_dot) export(layer_dropout) +export(layer_einsum_dense) export(layer_embedding) +export(layer_feature_space) export(layer_flatten) +export(layer_flax_module_wrapper) export(layer_gaussian_dropout) export(layer_gaussian_noise) export(layer_global_average_pooling_1d) @@ -378,22 +277,24 @@ export(layer_global_average_pooling_3d) export(layer_global_max_pooling_1d) export(layer_global_max_pooling_2d) export(layer_global_max_pooling_3d) +export(layer_group_normalization) +export(layer_group_query_attention) export(layer_gru) -export(layer_gru_cell) +export(layer_hashed_crossing) export(layer_hashing) +export(layer_identity) export(layer_input) export(layer_integer_lookup) +export(layer_jax_model_wrapper) export(layer_lambda) export(layer_layer_normalization) -export(layer_locally_connected_1d) -export(layer_locally_connected_2d) export(layer_lstm) -export(layer_lstm_cell) export(layer_masking) export(layer_max_pooling_1d) export(layer_max_pooling_2d) export(layer_max_pooling_3d) export(layer_maximum) +export(layer_mel_spectrogram) export(layer_minimum) export(layer_multi_head_attention) export(layer_multiply) @@ -403,10 +304,8 @@ export(layer_random_brightness) export(layer_random_contrast) export(layer_random_crop) export(layer_random_flip) -export(layer_random_height) export(layer_random_rotation) export(layer_random_translation) -export(layer_random_width) export(layer_random_zoom) export(layer_repeat_vector) export(layer_rescaling) @@ -416,14 +315,16 @@ export(layer_rnn) export(layer_separable_conv_1d) export(layer_separable_conv_2d) export(layer_simple_rnn) -export(layer_simple_rnn_cell) export(layer_spatial_dropout_1d) export(layer_spatial_dropout_2d) export(layer_spatial_dropout_3d) -export(layer_stacked_rnn_cells) +export(layer_spectral_normalization) export(layer_string_lookup) export(layer_subtract) export(layer_text_vectorization) +export(layer_tfsm) +export(layer_time_distributed) +export(layer_torch_module_wrapper) export(layer_unit_normalization) export(layer_upsampling_1d) export(layer_upsampling_2d) @@ -437,21 +338,21 @@ export(learning_rate_schedule_exponential_decay) export(learning_rate_schedule_inverse_time_decay) export(learning_rate_schedule_piecewise_constant_decay) export(learning_rate_schedule_polynomial_decay) -export(load_model_hdf5) -export(load_model_tf) -export(load_model_weights_hdf5) -export(load_model_weights_tf) -export(load_text_tokenizer) +export(load_model) +export(load_model_config) +export(load_model_weights) export(loss_binary_crossentropy) +export(loss_binary_focal_crossentropy) export(loss_categorical_crossentropy) +export(loss_categorical_focal_crossentropy) export(loss_categorical_hinge) -export(loss_cosine_proximity) export(loss_cosine_similarity) +export(loss_ctc) +export(loss_dice) export(loss_hinge) export(loss_huber) export(loss_kl_divergence) -export(loss_kullback_leibler_divergence) -export(loss_logcosh) +export(loss_log_cosh) export(loss_mean_absolute_error) export(loss_mean_absolute_percentage_error) export(loss_mean_squared_error) @@ -459,34 +360,41 @@ export(loss_mean_squared_logarithmic_error) export(loss_poisson) export(loss_sparse_categorical_crossentropy) export(loss_squared_hinge) -export(make_sampling_table) +export(loss_tversky) export(mark_active) -export(metric_accuracy) export(metric_auc) export(metric_binary_accuracy) export(metric_binary_crossentropy) +export(metric_binary_focal_crossentropy) +export(metric_binary_iou) export(metric_categorical_accuracy) export(metric_categorical_crossentropy) +export(metric_categorical_focal_crossentropy) export(metric_categorical_hinge) -export(metric_cosine_proximity) export(metric_cosine_similarity) +export(metric_f1_score) export(metric_false_negatives) export(metric_false_positives) +export(metric_fbeta_score) export(metric_hinge) -export(metric_kullback_leibler_divergence) -export(metric_logcosh_error) +export(metric_huber) +export(metric_iou) +export(metric_kl_divergence) +export(metric_log_cosh) +export(metric_log_cosh_error) export(metric_mean) export(metric_mean_absolute_error) export(metric_mean_absolute_percentage_error) export(metric_mean_iou) -export(metric_mean_relative_error) export(metric_mean_squared_error) export(metric_mean_squared_logarithmic_error) -export(metric_mean_tensor) export(metric_mean_wrapper) +export(metric_one_hot_iou) +export(metric_one_hot_mean_iou) export(metric_poisson) export(metric_precision) export(metric_precision_at_recall) +export(metric_r2_score) export(metric_recall) export(metric_recall_at_precision) export(metric_root_mean_squared_error) @@ -500,20 +408,6 @@ export(metric_sum) export(metric_top_k_categorical_accuracy) export(metric_true_negatives) export(metric_true_positives) -export(mobilenet_decode_predictions) -export(mobilenet_load_model_hdf5) -export(mobilenet_preprocess_input) -export(mobilenet_v2_decode_predictions) -export(mobilenet_v2_load_model_hdf5) -export(mobilenet_v2_preprocess_input) -export(model_from_json) -export(model_from_saved_model) -export(model_from_yaml) -export(model_to_json) -export(model_to_saved_model) -export(model_to_yaml) -export(multi_gpu_model) -export(nasnet_preprocess_input) export(new_callback_class) export(new_layer_class) export(new_learning_rate_schedule_class) @@ -521,115 +415,349 @@ export(new_loss_class) export(new_metric_class) export(new_model_class) export(normalize) +export(np_array) +export(op_abs) +export(op_add) +export(op_all) +export(op_any) +export(op_append) +export(op_arange) +export(op_arccos) +export(op_arccosh) +export(op_arcsin) +export(op_arcsinh) +export(op_arctan) +export(op_arctan2) +export(op_arctanh) +export(op_argmax) +export(op_argmin) +export(op_argsort) +export(op_array) +export(op_average) +export(op_average_pool) +export(op_batch_normalization) +export(op_binary_crossentropy) +export(op_bincount) +export(op_broadcast_to) +export(op_cast) +export(op_categorical_crossentropy) +export(op_ceil) +export(op_cholesky) +export(op_clip) +export(op_concatenate) +export(op_cond) +export(op_conj) +export(op_conv) +export(op_conv_transpose) +export(op_convert_to_numpy) +export(op_convert_to_tensor) +export(op_copy) +export(op_correlate) +export(op_cos) +export(op_cosh) +export(op_count_nonzero) +export(op_cross) +export(op_ctc_decode) +export(op_ctc_loss) +export(op_cumprod) +export(op_cumsum) +export(op_custom_gradient) +export(op_depthwise_conv) +export(op_det) +export(op_diag) +export(op_diagonal) +export(op_diff) +export(op_digitize) +export(op_divide) +export(op_divide_no_nan) +export(op_dot) +export(op_eig) +export(op_eigh) +export(op_einsum) +export(op_elu) +export(op_empty) +export(op_equal) +export(op_erf) +export(op_erfinv) +export(op_exp) +export(op_expand_dims) +export(op_expm1) +export(op_extract_sequences) +export(op_eye) +export(op_fft) +export(op_fft2) +export(op_flip) +export(op_floor) +export(op_floor_divide) +export(op_fori_loop) +export(op_full) +export(op_full_like) +export(op_gelu) +export(op_get_item) +export(op_greater) +export(op_greater_equal) +export(op_hard_sigmoid) +export(op_hard_silu) +export(op_hard_swish) +export(op_hstack) +export(op_identity) +export(op_imag) +export(op_image_affine_transform) +export(op_image_crop) +export(op_image_extract_patches) +export(op_image_map_coordinates) +export(op_image_pad) +export(op_image_resize) +export(op_image_rgb_to_grayscale) +export(op_in_top_k) +export(op_inv) +export(op_irfft) +export(op_is_tensor) +export(op_isclose) +export(op_isfinite) +export(op_isinf) +export(op_isnan) +export(op_istft) +export(op_leaky_relu) +export(op_less) +export(op_less_equal) +export(op_linspace) +export(op_log) +export(op_log10) +export(op_log1p) +export(op_log2) +export(op_log_sigmoid) +export(op_log_softmax) +export(op_logaddexp) +export(op_logical_and) +export(op_logical_not) +export(op_logical_or) +export(op_logical_xor) +export(op_logspace) +export(op_logsumexp) +export(op_lu_factor) +export(op_matmul) +export(op_max) +export(op_max_pool) +export(op_maximum) +export(op_mean) +export(op_median) +export(op_meshgrid) +export(op_min) +export(op_minimum) +export(op_mod) +export(op_moments) +export(op_moveaxis) +export(op_multi_hot) +export(op_multiply) +export(op_nan_to_num) +export(op_ndim) +export(op_negative) +export(op_nonzero) +export(op_norm) +export(op_normalize) +export(op_not_equal) +export(op_one_hot) +export(op_ones) +export(op_ones_like) +export(op_outer) +export(op_pad) +export(op_pmax) +export(op_pmin) +export(op_power) +export(op_prod) +export(op_qr) +export(op_quantile) +export(op_ravel) +export(op_real) +export(op_reciprocal) +export(op_relu) +export(op_relu6) +export(op_repeat) +export(op_reshape) +export(op_rfft) +export(op_roll) +export(op_round) +export(op_rsqrt) +export(op_scatter) +export(op_scatter_update) +export(op_segment_max) +export(op_segment_sum) +export(op_select) +export(op_selu) +export(op_separable_conv) +export(op_shape) +export(op_sigmoid) +export(op_sign) +export(op_silu) +export(op_sin) +export(op_sinh) +export(op_size) +export(op_slice) +export(op_slice_update) +export(op_softmax) +export(op_softplus) +export(op_softsign) +export(op_solve) +export(op_solve_triangular) +export(op_sort) +export(op_sparse_categorical_crossentropy) +export(op_split) +export(op_sqrt) +export(op_square) +export(op_squeeze) +export(op_stack) +export(op_std) +export(op_stft) +export(op_stop_gradient) +export(op_subtract) +export(op_sum) +export(op_svd) +export(op_swapaxes) +export(op_take) +export(op_take_along_axis) +export(op_tan) +export(op_tanh) +export(op_tensordot) +export(op_tile) +export(op_top_k) +export(op_trace) +export(op_transpose) +export(op_tri) +export(op_tril) +export(op_triu) +export(op_unstack) +export(op_var) +export(op_vdot) +export(op_vectorize) +export(op_vectorized_map) +export(op_vstack) +export(op_where) +export(op_while_loop) +export(op_zeros) +export(op_zeros_like) export(optimizer_adadelta) +export(optimizer_adafactor) export(optimizer_adagrad) export(optimizer_adam) +export(optimizer_adam_w) export(optimizer_adamax) +export(optimizer_ftrl) +export(optimizer_lion) +export(optimizer_loss_scale) export(optimizer_nadam) export(optimizer_rmsprop) export(optimizer_sgd) +export(pack_x_y_sample_weight) export(pad_sequences) export(pop_layer) -export(predict_classes) -export(predict_generator) export(predict_on_batch) -export(predict_proba) +export(quantize_weights) +export(random_beta) +export(random_binomial) +export(random_categorical) +export(random_dropout) +export(random_gamma) +export(random_integer) +export(random_normal) +export(random_seed_generator) +export(random_shuffle) +export(random_truncated_normal) +export(random_uniform) +export(register_keras_serializable) export(regularizer_l1) export(regularizer_l1_l2) export(regularizer_l2) export(regularizer_orthogonal) -export(reset_states) -export(resnet_preprocess_input) -export(resnet_v2_preprocess_input) +export(reset_state) +export(rnn_cell_gru) +export(rnn_cell_lstm) +export(rnn_cell_simple) +export(rnn_cells_stack) export(run_dir) -export(save_model_hdf5) -export(save_model_tf) -export(save_model_weights_hdf5) -export(save_model_weights_tf) -export(save_text_tokenizer) -export(sequences_to_matrix) -export(serialize_model) +export(save_model) +export(save_model_config) +export(save_model_weights) +export(serialize_keras_object) +export(set_custom_objects) +export(set_random_seed) export(set_vocabulary) export(set_weights) export(shape) -export(skipgrams) +export(split_dataset) export(tensorboard) export(test_on_batch) export(text_dataset_from_directory) -export(text_hashing_trick) -export(text_one_hot) -export(text_to_word_sequence) -export(text_tokenizer) -export(texts_to_matrix) -export(texts_to_sequences) -export(texts_to_sequences_generator) export(time_distributed) export(timeseries_dataset_from_array) -export(timeseries_generator) export(to_categorical) export(train_on_batch) export(tuple) export(unfreeze_weights) -export(unserialize_model) +export(unpack_x_y_sample_weight) export(use_backend) -export(use_condaenv) -export(use_implementation) export(use_python) -export(use_session_with_seed) export(use_virtualenv) export(with_custom_object_scope) -export(xception_preprocess_input) export(zip_lists) -import(R6) -import(magrittr) -import(methods) -import(zeallot) +import(reticulate) importFrom(generics,compile) importFrom(generics,fit) +importFrom(glue,trim) importFrom(graphics,par) importFrom(graphics,plot) importFrom(graphics,points) +importFrom(magrittr,"%<>%") +importFrom(magrittr,"%>%") importFrom(reticulate,array_reshape) +importFrom(reticulate,as_iterator) importFrom(reticulate,dict) importFrom(reticulate,import) importFrom(reticulate,import_builtins) importFrom(reticulate,import_from_path) +importFrom(reticulate,iter_next) importFrom(reticulate,iterate) +importFrom(reticulate,np_array) importFrom(reticulate,py_call) importFrom(reticulate,py_capture_output) importFrom(reticulate,py_clear_last_error) +importFrom(reticulate,py_del_item) importFrom(reticulate,py_dict) importFrom(reticulate,py_eval) importFrom(reticulate,py_func) importFrom(reticulate,py_get_attr) +importFrom(reticulate,py_get_item) importFrom(reticulate,py_has_attr) -importFrom(reticulate,py_id) +importFrom(reticulate,py_install) importFrom(reticulate,py_is_null_xptr) importFrom(reticulate,py_iterator) -importFrom(reticulate,py_load_object) -importFrom(reticulate,py_save_object) importFrom(reticulate,py_str) importFrom(reticulate,py_to_r) importFrom(reticulate,py_to_r_wrapper) importFrom(reticulate,r_to_py) importFrom(reticulate,tuple) -importFrom(reticulate,use_condaenv) importFrom(reticulate,use_python) importFrom(reticulate,use_virtualenv) importFrom(rlang,"%||%") +importFrom(rlang,.data) +importFrom(rlang,dots_list) +importFrom(rlang,is_string) +importFrom(rlang,list2) importFrom(stats,predict) +importFrom(tensorflow,all_dims) importFrom(tensorflow,as_tensor) importFrom(tensorflow,evaluate) importFrom(tensorflow,export_savedmodel) importFrom(tensorflow,install_tensorflow) -importFrom(tensorflow,shape) importFrom(tensorflow,tensorboard) importFrom(tensorflow,tf_config) importFrom(tensorflow,tf_version) -importFrom(tensorflow,use_session_with_seed) importFrom(tfruns,flag_boolean) importFrom(tfruns,flag_integer) importFrom(tfruns,flag_numeric) importFrom(tfruns,flag_string) importFrom(tfruns,flags) importFrom(tfruns,run_dir) +importFrom(utils,modifyList) +importFrom(zeallot,"%<-%") +importFrom(zeallot,destructure) diff --git a/NEWS.md b/NEWS.md index c1644b0c65..b3f23133de 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,180 @@ -# keras (development version) +# keras3 (development version) + +- Chains of `layer_*` calls with `|>` now instantiate layers in the + same order as `%>%` pipe chains: left-hand-side first (#1440). + +- `iterate()`, `iter_next()` and `as_iterator()` are now reexported from reticulate. + +User facing changes with upstream Keras v3.3.2: + +- new function: `op_ctc_decode()` +- new function: `op_eigh()` +- new function: `op_select()` +- new function: `op_vectorize()` +- new function: `op_image_rgb_to_grayscale()` +- new function: `loss_tversky()` + +- new args: `layer_resizing(pad_to_aspect_ratio, fill_mode, fill_value)` +- new arg: `layer_embedding(weights)` for providing an initial weights matrix + +- new args: `op_nan_to_num(nan, posinf, neginf)` +- new args: `op_image_resize(crop_to_aspect_ratio, pad_to_aspect_ratio, fill_mode, fill_value)` +- new args: `op_argmax(keepdims)` and `op_argmin(keepdims)` + +- new arg: `clear_session(free_memory)` for clearing without invoking the garbage collector. + +- `metric_kl_divergence()` and `loss_kl_divergence()` clip inputs + (`y_true` and `y_pred`) to the `[0, 1]` range. + +- new `Layer()` attributes: `metrics`, `dtype_policy` + +- Added initial support for float8 training + +- `layer_conv_*d()` layers now support LoRa + +- `op_digitize()` now supports sparse tensors. + +- Models and layers now return owned metrics recursively. + +- Add pickling support for Keras models. (e.g., via `reticulate::py_save_object()`) + Note that pickling is not recommended, prefer using Keras saving APIs. + + +# keras3 0.2.0 + +New functions: + + - `quantize_weights()`: quantize model or layer weights in-place. Currently, + only `Dense`, `EinsumDense`, and `Embedding` layers are supported (which is enough to + cover the majority of transformers today) + - `layer_mel_spectrogram()` + - `layer_flax_module_wrapper()` + - `layer_jax_model_wrapper()` + + - `loss_dice()` + + - `random_beta()` + - `random_binomial()` + + - `config_set_backend()`: change the backend after Keras has initialized. + - `config_dtype_policy()` + - `config_set_dtype_policy()` + + - New Ops + - `op_custom_gradient()` + - `op_batch_normalization()` + - `op_image_crop()` + - `op_divide_no_nan()` + - `op_normalize()` + - `op_correlate()` + - ` + - New family of linear algebra ops + - `op_cholesky()` + - `op_det()` + - `op_eig()` + - `op_inv()` + - `op_lu_factor()` + - `op_norm()` + - `op_erfinv()` + - `op_solve_triangular()` + - `op_svd()` + +- `audio_dataset_from_directory()`, `image_dataset_from_directory()` and `text_dataset_from_directory()` gain a `verbose` argument (default `TRUE`) + +- `image_dataset_from_directory()` gains `pad_to_aspect_ratio` argument (default `FALSE`) + +- `to_categorical()`, `op_one_hot()`, and `fit()` can now accept R factors, + offset them to be 0-based (reported in `#1055`). + +- `op_convert_to_numpy()` now returns unconverted NumPy arrays. + +- `op_array()` and `op_convert_to_tensor()` no longer error when casting R + doubles to integer types. + +- `export_savedmodel()` now works with a Jax backend. + +- `Metric()$add_variable()` method gains arg: `aggregration`. +- `Layer()$add_weight()` method gains args: `autocast`, `regularizer`, `aggregation`. + +- `op_bincount()`, `op_multi_hot()`, `op_one_hot()`, and `layer_category_encoding()` now support sparse tensors. + +- `op_custom_gradient()` now supports the PyTorch backend + +- `layer_lstm()` and `layer_gru()` gain arg `use_cudnn`, default `'auto'`. + +- Fixed an issue where `application_preprocess_inputs()` would error if supplied + an R array as input. + +- Doc improvements. + +# keras3 0.1.0 + +- The package has been rebuilt for Keras 3.0. Refer to for an overview + and https://keras.posit.co for the current up-to-date documentation. + +# keras 2.13.0 + +- Default TF version installed by `install_keras()` is now 2.13. + +- Updated layers: + - `layer_batch_normalization()` updated signature, with changes to options for distributed training. + - `layer_embedding()` gains a `sparse` argument. + +- Fixed deadlock when an R generator was passed to `fit()`, `predict()`, and other endpoints. + +- When `fit(verbose = "auto")` is evaluated in the context of a knitr document + (e.g., quarto or rmarkdown document being rendered), verbose will now + default to `2`, showing one line per epoch. + +# keras 2.11.1 + +- Update S3 method formals per new CRAN requirement (`r_to_py.keras_layer_wrapper()`) + +- Fixed an issue where `get_file()` would place incorrectly + save files in the current working directory. (#1365) + +# keras 2.11.0 + +- Default TensorFlow version installed by `install_keras()` is now 2.11. + +- All optimizers have been updated for keras/tensorflow version 2.11. + Arguments to all the optimizers have changed. To access the previous + optimizer implementations, use the constructors available at + `keras$optimizers$legacy`. For example, use `keras$optimizers$legacy$Adam()` + for the previous implementation of `optimizer_adam()`. + +- New optimizer `optimizer_frtl()`. + +- updates to layers: + - `layer_attention()` gains `score_mode` and `dropout` arguments. + - `layer_discretization()` gains `output_mode` and `sparse` arguments. + - `layer_gaussian_dropout()` and `layer_gaussian_noise()` gain a `seed` argument. + - `layer_hashing()` gains `output_mode` and `sparse` arguments. + - `layer_integer_lookup()` gains `vocabulary_dtype` and `idf_weights` arguments. + - `layer_normalization()` gains an `invert` argument. + - `layer_string_lookup()` gains an `idf_weights` argument. + +- Fixed issue where `input_shape` supplied to custom layers defined with `new_layer_class()` + would result in an error (#1338) - New `callback_backup_and_restore()`, for resuming an interrupted `fit()` call. +- The merging family of layers (`layer_add`, `layer_concatenate`, etc.) gain the ability + to accept layers in `...`, allowing for easier composition of residual blocks with the pipe `%>%`. + e.g. something like this now works: + ```r + block_1_output <- ... + block_2_output <- block_1_output %>% + layer_conv_2d(64, 3, activation = "relu", padding = "same") %>% + layer_add(block_1_output) + ``` + +- `model$get_config()` method now returns an R object that can be safely serialized + to rds. + +- `keras_array()` now reflects unconverted Python objects. This enables passing + objects like `pandas.Series()` to `fit()` and `evaluate()` methods. (#1341) + # keras 2.9.0 - New functions for constructing custom keras subclasses: @@ -173,10 +346,10 @@ processes one step of a sequence. New symbols: - `layer_rnn()`, which can compose with builtin cells: - - `layer_gru_cell()` - - `layer_lstm_cell()` - - `layer_simple_rnn_cell()` - - `layer_stacked_rnn_cells()` + - `rnn_cell_gru()` + - `rnn_cell_lstm()` + - `rnn_cell_simple()` + - `rnn_cells_stack()` To learn more, including how to make a custom cell layer, see the new vignette: "Working with RNNs". @@ -329,7 +502,7 @@ Breaking changes (Tensorflow 2.6): - Note: The following breaking changes are specific to Tensorflow version 2.6.0. However, the keras R package maintains compatibility with multiple versions of Tensorflow/Keras. You can upgrade the R package and still preserve the previous behavior by - installing a specific version of Tensorflow: `keras::install_keras(tensorflow="2.4.0")` + installing a specific version of Tensorflow: `keras3::install_keras(tensorflow="2.4.0")` - `predict_proba()` and `predict_classes()` were removed. - `model_to_yaml()` and `model_from_yaml()` were removed. diff --git a/R/Callback.R b/R/Callback.R new file mode 100644 index 0000000000..da9037e23f --- /dev/null +++ b/R/Callback.R @@ -0,0 +1,429 @@ + +#' Define a custom `Callback` class +#' +#' @description +#' Callbacks can be passed to keras methods such as `fit()`, `evaluate()`, and +#' `predict()` in order to hook into the various stages of the model training, +#' evaluation, and inference lifecycle. +#' +#' To create a custom callback, call `Callback()` and +#' override the method associated with the stage of interest. +#' +#' # Examples +#' ```{r, eval = F} +#' training_finished <- FALSE +#' callback_mark_finished <- Callback("MarkFinished", +#' on_train_end = function(logs = NULL) { +#' training_finished <<- TRUE +#' } +#' ) +#' +#' model <- keras_model_sequential(input_shape = c(1)) |> +#' layer_dense(1) +#' model |> compile(loss = 'mean_squared_error') +#' model |> fit(op_ones(c(1, 1)), op_ones(c(1, 1)), +#' callbacks = callback_mark_finished()) +#' stopifnot(isTRUE(training_finished)) +#' ``` +#' +#' All R function custom methods (public and private) will have the +#' following symbols in scope: +#' * `self`: the `Layer` instance. +#' * `super`: the `Layer` superclass. +#' * `private`: An R environment specific to the class instance. +#' Any objects defined here will be invisible to the Keras framework. +#' * `__class__` the current class type object. This will also be available as +#' an alias symbol, the value supplied to `Layer(classname = )` +#' +#' # Attributes (accessible via `self$`) +#' +#' * `params`: Named list, Training parameters +#' (e.g. verbosity, batch size, number of epochs, ...). +#' * `model`: Instance of `Model`. +#' Reference of the model being trained. +#' +#' The `logs` named list that callback methods +#' take as argument will contain keys for quantities relevant to +#' the current batch or epoch (see method-specific docstrings). +#' +#' @param +#' on_epoch_begin +#' ```r +#' \(epoch, logs = NULL) +#' ``` +#' Called at the start of an epoch. +#' +#' Subclasses should override for any actions to run. This function should +#' only be called during TRAIN mode. +#' +#' Args: +#' * `epoch`: Integer, index of epoch. +#' * `logs`: Named List. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_epoch_end +#' ```r +#' \(epoch, logs = NULL) +#' ``` +#' Called at the end of an epoch. +#' +#' Subclasses should override for any actions to run. This function should +#' only be called during TRAIN mode. +#' +#' Args: +#' * `epoch`: Integer, index of epoch. +#' * `logs`: Named List, metric results for this training epoch, and for the +#' validation epoch if validation is performed. Validation result +#' keys are prefixed with `val_`. For training epoch, the values of +#' the `Model`'s metrics are returned. Example: +#' `list(loss = 0.2, accuracy = 0.7)`. +#' @param +#' on_predict_batch_begin +#' ```r +#' \(batch, logs = NULL) +#' ``` +#' Called at the beginning of a batch in `predict()` methods. +#' +#' Subclasses should override for any actions to run. +#' +#' Note that if the `steps_per_execution` argument to `compile()` in +#' `Model` is set to `N`, this method will only be called every +#' `N` batches. +#' +#' Args: +#' * `batch`: Integer, index of batch within the current epoch. +#' * `logs`: Named list. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_predict_batch_end +#' ```r +#' \(batch, logs = NULL) +#' ``` +#' Called at the end of a batch in `predict()` methods. +#' +#' Subclasses should override for any actions to run. +#' +#' Note that if the `steps_per_execution` argument to `compile` in +#' `Model` is set to `N`, this method will only be called every +#' `N` batches. +#' +#' Args: +#' * `batch`: Integer, index of batch within the current epoch. +#' * `logs`: Named list. Aggregated metric results up until this batch. +#' +#' @param +#' on_predict_begin +#' ```r +#' \(logs = NULL) +#' ``` +#' Called at the beginning of prediction. +#' +#' Subclasses should override for any actions to run. +#' +#' Args: +#' * `logs`: Named list. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_predict_end +#' ```r +#' \(logs = NULL) +#' ``` +#' Called at the end of prediction. +#' +#' Subclasses should override for any actions to run. +#' +#' Args: +#' * `logs`: Named list. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_test_batch_begin +#' ```r +#' \(batch, logs = NULL) +#' ``` +#' Called at the beginning of a batch in `evaluate()` methods. +#' +#' Also called at the beginning of a validation batch in the `fit()` +#' methods, if validation data is provided. +#' +#' Subclasses should override for any actions to run. +#' +#' Note that if the `steps_per_execution` argument to `compile()` in +#' `Model` is set to `N`, this method will only be called every +#' `N` batches. +#' +#' Args: +#' * `batch`: Integer, index of batch within the current epoch. +#' * `logs`: Named list. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_test_batch_end +#' ```r +#' \(batch, logs = NULL) +#' ``` +#' Called at the end of a batch in `evaluate()` methods. +#' +#' Also called at the end of a validation batch in the `fit()` +#' methods, if validation data is provided. +#' +#' Subclasses should override for any actions to run. +#' +#' Note that if the `steps_per_execution` argument to `compile()` in +#' `Model` is set to `N`, this method will only be called every +#' `N` batches. +#' +#' Args: +#' * `batch`: Integer, index of batch within the current epoch. +#' * `logs`: Named list. Aggregated metric results up until this batch. +#' +#' @param +#' on_test_begin +#' ```r +#' \(logs = NULL) +#' ``` +#' Called at the beginning of evaluation or validation. +#' +#' Subclasses should override for any actions to run. +#' +#' Args: +#' * `logs`: Named list. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_test_end +#' ```r +#' \(logs = NULL) +#' ``` +#' Called at the end of evaluation or validation. +#' +#' Subclasses should override for any actions to run. +#' +#' Args: +#' * `logs`: Named list. Currently the output of the last call to +#' `on_test_batch_end()` is passed to this argument for this method +#' but that may change in the future. +#' +#' @param +#' on_train_batch_begin +#' ``` +#' \(batch, logs = NULL) +#' ``` +#' Called at the beginning of a training batch in `fit()` methods. +#' +#' Subclasses should override for any actions to run. +#' +#' Note that if the `steps_per_execution` argument to `compile` in +#' `Model` is set to `N`, this method will only be called every +#' `N` batches. +#' +#' Args: +#' * `batch`: Integer, index of batch within the current epoch. +#' * `logs`: Named list. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_train_batch_end +#' ``` +#' \(batch, logs=NULL) +#' ``` +#' Called at the end of a training batch in `fit()` methods. +#' +#' Subclasses should override for any actions to run. +#' +#' Note that if the `steps_per_execution` argument to `compile` in +#' `Model` is set to `N`, this method will only be called every +#' `N` batches. +#' +#' Args: +#' * `batch`: Integer, index of batch within the current epoch. +#' * `logs`: Named list. Aggregated metric results up until this batch. +#' +#' @param +#' on_train_begin +#' ``` +#' \(logs = NULL) +#' ``` +#' Called at the beginning of training. +#' +#' Subclasses should override for any actions to run. +#' +#' Args: +#' * `logs`: Named list. Currently no data is passed to this argument for this +#' method but that may change in the future. +#' +#' @param +#' on_train_end +#' ``` +#' \(logs = NULL) +#' ``` +#' Called at the end of training. +#' +#' Subclasses should override for any actions to run. +#' +#' Args: +#' * `logs`: Named list. Currently the output of the last call to +#' `on_epoch_end()` is passed to this argument for this method but +#' that may change in the future. +#' +#' +# commented out until we have an appropriate 1-based wrapper +# for CallbackList. +# ' @details +# ' +# ' If you want to use `Callback` objects in a custom training loop: +# ' +# ' 1. You should pack all your callbacks into a single `keras$callbacks$CallbackList` +# ' so they can all be called together. +# ' 2. You will need to manually call all the `on_*` methods at the appropriate +# ' locations in your loop. Like this: +# ' +# ' Example: +# ' +# ' ```r +# ' CallbackList <- function(...) +# ' reticulate::import("keras")$callbacks$CallbackList(list(...)) +# ' enumerate <- reticulate::import_builtins()$enumerate +# ' callbacks <- CallbackList(callback1(), callback2(), ...) +# ' callbacks$append(callback3()) +# ' callbacks$on_train_begin(...) +# ' for (epoch in seq(0, len = EPOCHS)) { +# ' callbacks$on_epoch_begin(epoch) +# ' ds_iterator <- as_iterator(enumerate(dataset)) +# ' while (!is.null(c(i, batch) %<-% iter_next(ds_iterator))) { +# ' callbacks$on_train_batch_begin(i) +# ' batch_logs <- model$train_step(batch) +# ' callbacks$on_train_batch_end(i, batch_logs) +# ' } +# ' epoch_logs <- ... +# ' callbacks$on_epoch_end(epoch, epoch_logs) +# ' } +# ' final_logs <- ... +# ' callbacks$on_train_end(final_logs) +# ' ``` +#' @returns A function that returns the custom `Callback` instances, +#' similar to the builtin callback functions. +#' @inheritSection Layer Symbols in scope +#' @inheritParams Layer +#' @export +#' @tether keras.callbacks.Callback +#' @family callbacks +#' @seealso +#' + +# + +Callback <- +function(classname, + on_epoch_begin = NULL, + on_epoch_end = NULL, + on_train_begin = NULL, + on_train_end = NULL, + on_train_batch_begin = NULL, + on_train_batch_end = NULL, + on_test_begin = NULL, + on_test_end = NULL, + on_test_batch_begin = NULL, + on_test_batch_end = NULL, + on_predict_begin = NULL, + on_predict_end = NULL, + on_predict_batch_begin = NULL, + on_predict_batch_end = NULL, + ..., + public = list(), + private = list(), + inherit = NULL, + parent_env = parent.frame()) +{ + + members <- drop_nulls(named_list( + on_epoch_begin, on_epoch_end, + on_train_begin, on_train_end, + on_train_batch_begin, on_train_batch_end, + on_test_begin, on_test_end, + on_test_batch_begin, on_test_batch_end, + on_predict_begin, on_predict_end, + on_predict_batch_begin, on_predict_batch_end + )) + members <- modifyList(members, list2(...), keep.null = FALSE) + members <- modifyList(members, public, keep.null = TRUE) + + members <- modify_intersection(members, list( + from_config = function(x) decorate_method(x, "classmethod"), + on_epoch_begin = decorate_callback_method_sig_idx_logs, + on_epoch_end = decorate_callback_method_sig_idx_logs, + on_train_begin = decorate_callback_method_sig_logs, + on_train_end = decorate_callback_method_sig_logs, + + # on_batch_{begin,end} are backwards compatible + # aliases for `on_train_batch_{begin,end}` + on_batch_begin = decorate_callback_method_sig_idx_logs, + on_batch_end = decorate_callback_method_sig_idx_logs, + + on_train_batch_begin = decorate_callback_method_sig_idx_logs, + on_train_batch_end = decorate_callback_method_sig_idx_logs, + on_test_begin = decorate_callback_method_sig_logs, + on_test_end = decorate_callback_method_sig_logs, + on_test_batch_begin = decorate_callback_method_sig_idx_logs, + on_test_batch_end = decorate_callback_method_sig_idx_logs, + on_predict_begin = decorate_callback_method_sig_logs, + on_predict_end = decorate_callback_method_sig_logs, + on_predict_batch_begin = decorate_callback_method_sig_idx_logs, + on_predict_batch_end = decorate_callback_method_sig_idx_logs + )) + + inherit <- substitute(inherit) %||% + quote(base::asNamespace("keras3")$keras$callbacks$Callback) + + new_wrapped_py_class( + classname = classname, + members = members, + inherit = inherit, + parent_env = parent_env, + private = private + ) +} + +# some indirection in the decorators to allow for delayed initialization of +# Python. +decorate_callback_method_sig_idx_logs <- function(fn) { + decorate_method(fn, wrap_callback_method_sig_idx_logs) +} + +decorate_callback_method_sig_logs <- function(fn) { + decorate_method(fn, wrap_callback_method_sig_logs) +} + +wrap_callback_method_sig_idx_logs <- function(fn) { + tools <- import_callback_tools() + tools$wrap_sig_self_idx_logs(fn) +} + +wrap_callback_method_sig_logs <- function(fn) { + tools <- import_callback_tools() + tools$wrap_sig_self_logs(fn) +} + +import_kerastools <- function(x) { + import_from_path( + paste0(c("kerastools", x), collapse = "."), + path = system.file("python", package = "keras3") + ) +} + + +import_callback_tools <- function() { + import_from_path( + "kerastools.callback", + path = system.file("python", package = "keras3")) +} + +#' @export +# needed so `self$model$stop_training <- TRUE` doesn't try to reset +# the `model` attr, which is a @property that raises AttributeError +`$<-.keras.src.callbacks.callback.Callback` <- function(x, name, value) { + if(name == "model" && py_is(value, py_get_attr(x, "model", TRUE))) + return(x) + NextMethod() +} diff --git a/R/Constraint.R b/R/Constraint.R new file mode 100644 index 0000000000..ba85158339 --- /dev/null +++ b/R/Constraint.R @@ -0,0 +1,103 @@ +#' Define a custom `Constraint` class +#' +#' @description +#' Base class for weight constraints. +#' +#' A `Constraint()` instance works like a stateless function. +#' Users who subclass the `Constraint` class should override +#' the `call()` method, which takes a single +#' weight parameter and return a projected version of that parameter +#' (e.g. normalized or clipped). Constraints can be used with various Keras +#' layers via the `kernel_constraint` or `bias_constraint` arguments. +#' +#' Here's a simple example of a non-negative weight constraint: +#' ```{r} +#' constraint_nonnegative <- Constraint("NonNegative", +#' call = function(w) { +#' w * op_cast(w >= 0, dtype = w$dtype) +#' } +#' ) +#' weight <- op_convert_to_tensor(c(-1, 1)) +#' constraint_nonnegative()(weight) +#' ``` +#' +#' Usage in a layer: +#' ```{r, output = FALSE} +#' layer_dense(units = 4, kernel_constraint = constraint_nonnegative()) +#' ``` +#' +#' @param +#' call +#' ```r +#' \(w) +#' ``` +#' Applies the constraint to the input weight variable. +#' +#' By default, the inputs weight variable is not modified. +#' Users should override this method to implement their own projection +#' function. +#' +#' Args: +#' * `w`: Input weight variable. +#' +#' Returns: +#' Projected variable (by default, returns unmodified inputs). +#' +#' @param +#' get_config +#' ```r +#' \() +#' ``` +#' Function that returns a named list of the object config. +#' +#' A constraint config is a named list (JSON-serializable) that can +#' be used to reinstantiate the same object +#' (via `do.call(, )`). +#' +#' @returns A function that returns `Constraint` instances, similar to the +#' builtin constraint functions like `constraint_maxnorm()`. +#' @tether keras.constraints.Constraint +#' @inheritSection Layer Symbols in scope +#' @inheritParams Layer +#' @family constraints +#' @export +Constraint <- function(classname, call = NULL, get_config = NULL, + ..., + public = list(), + private = list(), + inherit = NULL, + parent_env = parent.frame()) { + + members <- Reduce(function(x, y) modifyList(x, y, keep.null = TRUE), + list(drop_nulls(named_list(call, get_config)), + list2(...), + public)) + + members <- rename(members, "__call__" = "call", + .skip_existing = TRUE) + + members <- modify_intersection(members, list( + from_config = function(x) decorate_method(x, "classmethod") + )) + + inherit <- substitute(inherit) %||% + quote(base::asNamespace("keras3")$keras$constraints$Constraint) + + new_wrapped_py_class( + classname = classname, + members = members, + inherit = inherit, + parent_env = parent_env, + private = private + ) +} + + +as_constraint <- function(x) { + if(inherits(x, "keras.src.constraints.constraints.Constraint")) + return(x) + if(!is_bare_r_function(x)) + stop("constraint must be a keras3::Constraint() instance or a bare function") + classname <- get_function_name(x) %||% "CustomConstraint" + Constraint(classname, call = x)() +} diff --git a/R/Layer.R b/R/Layer.R index e47426d585..0eed99e4ee 100644 --- a/R/Layer.R +++ b/R/Layer.R @@ -1,88 +1,673 @@ -#' (Deprecated) Create a custom Layer -#' -#' This function is maintained but deprecated. Please use `new_layer_class()` or -#' `%py_class%` to define custom layers. -#' -#' @param classname the name of the custom Layer. -#' @param initialize a function. This is where you define the arguments used to further -#' build your layer. For example, a dense layer would take the `units` argument. -#' You should always call \code{super()$`__init__()`} to initialize the base -#' inherited layer. -#' @param build a function that takes `input_shape` as argument. This is where you will -#' define your weights. Note that if your layer doesn't define trainable weights then -#' you need not implement this method. -#' @param call This is where the layer's logic lives. Unless you want your layer to -#' support masking, you only have to care about the first argument passed to `call` -#' (the input tensor). -#' @param compute_output_shape a function that takes `input_shape` as an argument. In -#' case your layer modifies the shape of its input, you should specify here the -#' shape transformation logic. This allows Keras to do automatic shape inference. -#' If you don't modify the shape of the input then you need not implement this -#' method. -#' @param ... Any other methods and/or attributes can be specified using named -#' arguments. They will be added to the layer class. -#' @param inherit the Keras layer to inherit from. -#' @return A function that wraps `create_layer`, similar to `keras::layer_dense`. -#' @examples -#' \dontrun{ -#' -#' layer_dense2 <- Layer( -#' "Dense2", -#' -#' initialize = function(units) { -#' super()$`__init__`() -#' self$units <- as.integer(units) +#' Define a custom `Layer` class. +#' +#' @description +#' A layer is a callable object that takes as input one or more tensors and +#' that outputs one or more tensors. It involves *computation*, defined +#' in the `call()` method, and a *state* (weight variables). State can be +#' created: +#' +#' * in `initialize()`, for instance via `self$add_weight()`; +#' * in the optional `build()` method, which is invoked by the first +#' `call()` to the layer, and supplies the shape(s) of the input(s), +#' which may not have been known at initialization time. +#' +#' Layers are recursively composable: If you assign a Layer instance as an +#' attribute of another Layer, the outer layer will start tracking the weights +#' created by the inner layer. Nested layers should be instantiated in the +#' `initialize()` method or `build()` method. +#' +#' Users will just instantiate a layer and then treat it as a callable. +#' +#' # Symbols in scope +#' +#' All R function custom methods (public and private) will have the +#' following symbols in scope: +#' * `self`: The custom class instance. +#' * `super`: The custom class superclass. +#' * `private`: An R environment specific to the class instance. +#' Any objects assigned here are invisible to the Keras framework. +#' * `__class__` and `as.symbol(classname)`: the custom class type object. +#' +#' # Attributes +#' +#' * `name`: The name of the layer (string). +#' * `dtype`: Dtype of the layer's weights. Alias of `layer$variable_dtype`. +#' * `variable_dtype`: Dtype of the layer's weights. +#' * `compute_dtype`: The dtype of the layer's computations. +#' Layers automatically cast inputs to this dtype, which causes +#' the computations and output to also be in this dtype. +#' When mixed precision is used with a +#' `keras$mixed_precision$DTypePolicy`, this will be different +#' than `variable_dtype`. +#' * `trainable_weights`: List of variables to be included in backprop. +#' * `non_trainable_weights`: List of variables that should not be +#' included in backprop. +#' * `weights`: The concatenation of the lists `trainable_weights` and +#' `non_trainable_weights` (in this order). +#' * `trainable`: Whether the layer should be trained (boolean), i.e. +#' whether its potentially-trainable weights should be returned +#' as part of `layer$trainable_weights`. +#' * `input_spec`: Optional (list of) `InputSpec` object(s) specifying the +#' constraints on inputs that can be accepted by the layer. +#' +#' We recommend that custom `Layer`s implement the following methods: +#' +#' * `initialize()`: Defines custom layer attributes, and creates layer weights +#' that do not depend on input shapes, using `add_weight()`, +#' or other state. +#' * `build(input_shape)`: This method can be used to create weights that +#' depend on the shape(s) of the input(s), using `add_weight()`, or other +#' state. Calling `call()` will automatically build the layer +#' (if it has not been built yet) by calling `build()`. +#' * `call(...)`: Method called after making +#' sure `build()` has been called. `call()` performs the logic of applying +#' the layer to the input arguments. +#' Two reserved arguments you can optionally use in `call()` are: +#' +#' 1. `training` (boolean, whether the call is in inference mode or +#' training mode). +#' 2. `mask` (boolean tensor encoding masked timesteps in the input, +#' used e.g. in RNN layers). +#' +#' A typical signature for this method is `call(inputs)`, and user +#' could optionally add `training` and `mask` if the layer need them. +#' * `get_config()`: Returns a named list containing the configuration +#' used to initialize this layer. If the list names differ from the arguments +#' in `initialize()`, then override `from_config()` as well. +#' This method is used when saving +#' the layer or a model that contains this layer. +#' +#' # Examples +#' Here's a basic example: a layer with two variables, `w` and `b`, +#' that returns `y <- (w %*% x) + b`. +#' It shows how to implement `build()` and `call()`. +#' Variables set as attributes of a layer are tracked as weights +#' of the layers (in `layer$weights`). +#' +#' ```{r} +#' layer_simple_dense <- Layer( +#' "SimpleDense", +#' initialize = function(units = 32) { +#' super$initialize() +#' self$units <- units #' }, #' +#' # Create the state of the layer (weights) #' build = function(input_shape) { -#' print(class(input_shape)) #' self$kernel <- self$add_weight( -#' name = "kernel", -#' shape = list(input_shape[[2]], self$units), -#' initializer = "uniform", -#' trainable = TRUE +#' shape = shape(tail(input_shape, 1), self$units), +#' initializer = "glorot_uniform", +#' trainable = TRUE, +#' name = "kernel" +#' ) +#' self$bias = self$add_weight( +#' shape = shape(self$units), +#' initializer = "zeros", +#' trainable = TRUE, +#' name = "bias" #' ) #' }, #' -#' call = function(x) { -#' tensorflow::tf$matmul(x, self$kernel) +#' # Defines the computation +#' call = function(self, inputs) { +#' op_matmul(inputs, self$kernel) + self$bias +#' } +#' ) +#' +#' # Instantiates the layer. +#' # Supply missing `object` arg to skip invoking `call()` and instead return +#' # the Layer instance +#' linear_layer <- layer_simple_dense(, 4) +#' +#' # This will call `build(input_shape)` and create the weights, +#' # and then invoke `call()`. +#' y <- linear_layer(op_ones(c(2, 2))) +#' stopifnot(length(linear_layer$weights) == 2) +#' +#' # These weights are trainable, so they're listed in `trainable_weights`: +#' stopifnot(length(linear_layer$trainable_weights) == 2) +#' ``` +#' +#' Besides trainable weights, updated via backpropagation during training, +#' layers can also have non-trainable weights. These weights are meant to +#' be updated manually during `call()`. Here's a example layer that computes +#' the running sum of its inputs: +#' +#' ```{r} +#' layer_compute_sum <- Layer( +#' classname = "ComputeSum", +#' +#' initialize = function(input_dim) { +#' super$initialize() +#' +#' # Create a non-trainable weight. +#' self$total <- self$add_weight( +#' shape = shape(), +#' initializer = "zeros", +#' trainable = FALSE, +#' name = "total" +#' ) #' }, #' -#' compute_output_shape = function(input_shape) { -#' list(input_shape[[1]], self$units) +#' call = function(inputs) { +#' self$total$assign(self$total + op_sum(inputs)) +#' self$total #' } -#' #' ) #' -#' l <- layer_dense2(units = 10) -#' l(matrix(runif(10), ncol = 1)) +#' my_sum <- layer_compute_sum(, 2) +#' x <- op_ones(c(2, 2)) +#' y <- my_sum(x) +#' +#' stopifnot(exprs = { +#' all.equal(my_sum$weights, list(my_sum$total)) +#' all.equal(my_sum$non_trainable_weights, list(my_sum$total)) +#' all.equal(my_sum$trainable_weights, list()) +#' }) +#' ``` +#' +#' @details +#' +#' # Methods available +#' +#' * ```r +#' initialize(..., +#' activity_regularizer = NULL, +#' trainable = TRUE, +#' dtype = NULL, +#' autocast = TRUE, +#' name = NULL) +#' ``` +#' Initialize self. This method is typically called from a custom `initialize()` method. +#' Example: +#' +#' ```r +#' layer_my_layer <- Layer("MyLayer", +#' initialize = function(units, ..., dtype = NULL, name = NULL) { +#' super$initialize(..., dtype = dtype, name = name) +#' # .... finish initializing `self` instance +#' } +#' ) +#' ``` +#' Args: +#' * trainable: Boolean, whether the layer's variables should be trainable. +#' * name: String name of the layer. +#' * dtype: The dtype of the layer's computations and weights. Can also be a +#' `keras$DTypePolicy`, +#' which allows the computation and +#' weight dtype to differ. Defaults to `NULL`. `NULL` means to use +#' `config_dtype_policy()`, +#' which is a `"float32"` policy unless set to different value +#' (via `config_set_dtype_policy()`). +#' +#' * ```r +#' add_loss(loss) +#' ``` +#' Can be called inside of the `call()` method to add a scalar loss. +#' +#' Example: +#' +#' ```r +#' Layer("MyLayer", +#' ... +#' call = function(x) { +#' self$add_loss(op_sum(x)) +#' x +#' } +#' ``` +#' +#' * ```r +#' add_metric() +#' ``` +#' +#' * ```r +#' add_variable(...) +#' ``` +#' Add a weight variable to the layer. +#' +#' Alias of `add_weight()`. +#' +#' * ```r +#' add_weight(shape = NULL, +#' initializer = NULL, +#' dtype = NULL, +#' trainable = TRUE, +#' autocast = TRUE, +#' regularizer = NULL, +#' constraint = NULL, +#' aggregation = 'mean', +#' name = NULL) +#' ``` +#' Add a weight variable to the layer. +#' +#' Args: +#' * `shape`: shape for the variable (as defined by [`shape()`]) +#' Must be fully-defined (no `NA`/`NULL`/`-1` entries). +#' Defaults to `()` (scalar) if unspecified. +#' * `initializer`: Initializer object to use to +#' populate the initial variable value, +#' or string name of a built-in initializer +#' (e.g. `"random_normal"`). If unspecified, +#' defaults to `"glorot_uniform"` +#' for floating-point variables and to `"zeros"` +#' for all other types (e.g. int, bool). +#' * `dtype`: Dtype of the variable to create, +#' e.g. `"float32"`. If unspecified, +#' defaults to the layer's +#' variable dtype (which itself defaults to +#' `"float32"` if unspecified). +#' * `trainable`: Boolean, whether the variable should +#' be trainable via backprop or whether its +#' updates are managed manually. +#' Defaults to `TRUE`. +#' * `autocast`: Boolean, whether to autocast layers variables when +#' accessing them. Defaults to `TRUE`. +#' * `regularizer`: Regularizer object to call to apply penalty on the +#' weight. These penalties are summed into the loss function +#' during optimization. Defaults to `NULL`. +#' * `constraint`: Constraint object to call on the +#' variable after any optimizer update, +#' or string name of a built-in constraint. +#' Defaults to `NULL`. +#' * `aggregation`: String, one of `'mean'`, `'sum'`, +#' `'only_first_replica'`. Annotates the variable with the type +#' of multi-replica aggregation to be used for this variable +#' when writing custom data parallel training loops. +#' * `name`: String name of the variable. Useful for debugging purposes. +#' +#' Returns: +#' +#' A backend tensor, wrapped in a `KerasVariable` class. +#' The `KerasVariable` class has +#' +#' Methods: +#' - `assign(value)` +#' - `assign_add(value)` +#' - `assign_sub(value)` +#' - `numpy()` (calling `as.array()` is preferred) +#' +#' Properties/Attributes: +#' - `value` +#' - `dtype` +#' - `ndim` +#' - `shape` (calling `shape()` is preferred) +#' - `trainable` +#' +#' * ```r +#' build(input_shape) +#' ``` +#' +#' * ```r +#' build_from_config(config) +#' ``` +#' Builds the layer's states with the supplied config (named list of args). +#' +#' By default, this method calls the `do.call(build, config$input_shape)` method, +#' which creates weights based on the layer's input shape in the supplied +#' config. If your config contains other information needed to load the +#' layer's state, you should override this method. +#' +#' Args: +#' * `config`: Named list containing the input shape associated with this layer. +#' +#' * ```r +#' call(...) +#' ``` +#' See description above +#' +#' * ```r +#' compute_mask(inputs, previous_mask) +#' ``` +#' +#' * ```r +#' compute_output_shape(...) +#' ``` +#' +#' * ```r +#' compute_output_spec(...) +#' ``` +#' +#' * ```r +#' count_params() +#' ``` +#' Count the total number of scalars composing the weights. +#' +#' Returns: +#' An integer count. +#' +#' +#' * ```r +#' get_build_config() +#' ``` +#' Returns a named list with the layer's input shape. +#' +#' This method returns a config (named list) that can be used by +#' `build_from_config(config)` to create all states (e.g. Variables and +#' Lookup tables) needed by the layer. +#' +#' By default, the config only contains the input shape that the layer +#' was built with. If you're writing a custom layer that creates state in +#' an unusual way, you should override this method to make sure this state +#' is already created when Keras attempts to load its value upon model +#' loading. +#' +#' Returns: +#' A named list containing the input shape associated with the layer. +#' +#' * ```r +#' get_config() +#' ``` +#' Returns the config of the object. +#' +#' An object config is a named list (serializable) +#' containing the information needed to re-instantiate it. +#' The config is expected to be serializable to JSON, and is expected +#' to consist of a (potentially complex, nested) structure of names lists +#' consisting of simple objects like strings, ints. +#' +#' * ```r +#' get_weights() +#' ``` +#' Return the values of `layer$weights` as a list of R or NumPy arrays. +#' +#' * ```r +#' quantize(mode) +#' ``` +#' Currently, only the `Dense` and `EinsumDense` layers support in-place +#' quantization via this `quantize()` method. +#' +#' Example: +#' ```r +#' model$quantize("int8") # quantize model in-place +#' model |> predict(data) # faster inference +#' ``` +#' +#' * ```r +#' quantized_call(...) +#' ``` +#' +#' * ```r +#' load_own_variables(store) +#' ``` +#' Loads the state of the layer. #' -#' } +#' You can override this method to take full control of how the state of +#' the layer is loaded upon calling `load_model()`. #' -#' @keywords internal +#' Args: +#' * `store`: Named list from which the state of the model will be loaded. +#' +#' * ```r +#' save_own_variables(store) +#' ``` +#' Saves the state of the layer. +#' +#' You can override this method to take full control of how the state of +#' the layer is saved upon calling `save_model()`. +#' +#' Args: +#' * `store`: Named list where the state of the model will be saved. +#' +#' * ```r +#' set_weights(weights) +#' ``` +#' Sets the values of `weights` from a list of R or NumPy arrays. +#' +#' * ```r +#' stateless_call(trainable_variables, non_trainable_variables, +#' ..., return_losses = FALSE) +#' ``` +#' Call the layer without any side effects. +#' +#' Args: +#' * `trainable_variables`: List of trainable variables of the model. +#' * `non_trainable_variables`: List of non-trainable variables of the +#' model. +#' * `...`: Positional and named arguments to be passed to `call()`. +#' * `return_losses`: If `TRUE`, `stateless_call()` will return the list of +#' losses created during `call()` as part of its return values. +#' +#' Returns: +#' An unnamed list. By default, returns `list(outputs, non_trainable_variables)`. +#' If `return_losses = TRUE`, then returns +#' `list(outputs, non_trainable_variables, losses)`. +#' +#' Note: `non_trainable_variables` include not only non-trainable weights +#' such as `BatchNormalization` statistics, but also RNG seed state +#' (if there are any random operations part of the layer, such as dropout), +#' and `Metric` state (if there are any metrics attached to the layer). +#' These are all elements of state of the layer. +#' +#' Example: +#' +#' ```r +#' model <- ... +#' data <- ... +#' trainable_variables <- model$trainable_variables +#' non_trainable_variables <- model$non_trainable_variables +#' # Call the model with zero side effects +#' c(outputs, non_trainable_variables) %<-% model$stateless_call( +#' trainable_variables, +#' non_trainable_variables, +#' data +#' ) +#' # Attach the updated state to the model +#' # (until you do this, the model is still in its pre-call state). +#' purrr::walk2( +#' model$non_trainable_variables, non_trainable_variables, +#' \(variable, value) variable$assign(value)) +#' ``` +#' +#' * ```r +#' symbolic_call(...) +#' ``` +#' +#' * ```r +#' from_config(config) +#' ``` +#' +#' Creates a layer from its config. +#' +#' This is a class method, meaning, the R function will not have a `self` +#' symbol (a class instance) in scope. Use `__class__` or the classname symbol +#' provided when the `Layer()` was constructed) to resolve the class definition. +#' The default implementation is: +#' ```r +#' from_config = function(config) { +#' do.call(`__class__`, config) +#' } +#' ``` +#' +#' This method is the reverse of `get_config()`, +#' capable of instantiating the same layer from the config +#' named list. It does not handle layer connectivity +#' (handled by Network), nor weights (handled by `set_weights()`). +#' +#' Args: +#' * `config`: A named list, typically the +#' output of `get_config()`. +#' +#' Returns: +#' A layer instance. +#' +#' +#' # Readonly properties: +#' +#' * `compute_dtype` +#' The dtype of the computations performed by the layer. +#' +#' * `dtype` +#' Alias of `layer$variable_dtype`. +#' +#' * `input_dtype` +#' The dtype layer inputs should be converted to. +#' +#' * `losses` +#' List of scalar losses from `add_loss()`, regularizers and sublayers. +#' +#' * `metrics` +#' List of all metrics. +#' +#' * `metrics_variables` +#' List of all metric variables. +#' +#' * `non_trainable_variables` +#' List of all non-trainable layer state. +#' +#' This extends `layer$non_trainable_weights` to include all state used by +#' the layer including state for metrics and `SeedGenerator`s. +#' +#' * `non_trainable_weights` +#' List of all non-trainable weight variables of the layer. +#' +#' These are the weights that should not be updated by the optimizer during +#' training. Unlike, `layer$non_trainable_variables` this excludes metric +#' state and random seeds. +#' +#' * `trainable_variables` +#' List of all trainable layer state. +#' +#' This is equivalent to `layer$trainable_weights`. +#' +#' * `trainable_weights` +#' List of all trainable weight variables of the layer. +#' +#' These are the weights that get updated by the optimizer during training. +#' +#' * `variable_dtype` +#' The dtype of the state (weights) of the layer. +#' +#' * `variables` +#' List of all layer state, including random seeds. +#' +#' This extends `layer$weights` to include all state used by the layer +#' including `SeedGenerator`s. +#' +#' Note that metrics variables are not included here, use +#' `metrics_variables` to visit all the metric variables. +#' +#' * `weights` +#' List of all weight variables of the layer. +#' +#' Unlike, `layer$variables` this excludes metric state and random seeds. +#' +#' * `input` +#' Retrieves the input tensor(s) of a symbolic operation. +#' +#' Only returns the tensor(s) corresponding to the *first time* +#' the operation was called. +#' +#' Returns: +#' Input tensor or list of input tensors. +#' +#' * `output` +#' Retrieves the output tensor(s) of a layer. +#' +#' Only returns the tensor(s) corresponding to the *first time* +#' the operation was called. +#' +#' Returns: +#' Output tensor or list of output tensors. +#' +#' # Data descriptors (Attributes): +#' +#' * `dtype_policy` +#' +#' * `input_spec` +#' +#' * `supports_masking` +#' Whether this layer supports computing a mask using `compute_mask`. +#' +#' * `trainable` +#' Settable boolean, whether this layer should be trainable or not. +#' +#' @param classname String, the name of the custom class. (Conventionally, CamelCase). +#' @param initialize,call,build,get_config Recommended methods to implement. See +#' description and details sections. +#' @param ...,public Additional methods or public members of the custom class. +#' @param private Named list of R objects (typically, functions) to include in +#' instance private environments. `private` methods will have all the same +#' symbols in scope as public methods (See section "Symbols in Scope"). Each +#' instance will have it's own `private` environment. Any objects +#' in `private` will be invisible from the Keras framework and the Python +#' runtime. +#' @param parent_env The R environment that all class methods will have as a grandparent. +#' @param inherit What the custom class will subclass. By default, the base keras class. +#' +#' @returns A composing layer constructor, with similar behavior to other layer +#' functions like `layer_dense()`. The first argument of the returned function +#' will be `object`, enabling `initialize()`ing and `call()` the layer in one +#' step while composing the layer with the pipe, like +#' +#' ```r +#' layer_foo <- Layer("Foo", ....) +#' output <- inputs |> layer_foo() +#' ``` +#' To only `initialize()` a layer instance and not `call()` it, pass a missing +#' or `NULL` value to `object`, or pass all arguments to `initialize()` by name. +#' +#' ```r +#' layer <- layer_dense(units = 2, activation = "relu") +#' layer <- layer_dense(NULL, 2, activation = "relu") +#' layer <- layer_dense(, 2, activation = "relu") +#' +#' # then you can call() the layer in a separate step +#' outputs <- inputs |> layer() +#' ``` +#' +#' @tether keras.layers.Layer #' @export +#' @family layers +#' @importFrom utils modifyList +#' @seealso +#' + +# + Layer <- -function(classname, initialize, build = NULL, call = NULL, - compute_output_shape = NULL, ..., - inherit = keras::keras$layers$Layer) { +function(classname, + initialize = NULL, + call = NULL, + build = NULL, + get_config = NULL, + ..., + public = list(), + private = list(), + inherit = NULL, + parent_env = parent.frame()) { + + members <- drop_nulls(named_list(initialize, call, build, get_config)) + members <- modifyList(members, list2(...), keep.null = TRUE) + members <- modifyList(members, public, keep.null = TRUE) - public <- capture_args(match.call(), ignore = c("classname", "inherit")) - for(ignore_if_null in c("build", "call", "compute_output_shape")) - public[[ignore_if_null]] <- public[[ignore_if_null]] + members <- modify_intersection(members, list( + from_config = function(x) decorate_method(x, "classmethod") + )) - inherit <- substitute(inherit) - parent_env <- parent.frame() + inherit <- substitute(inherit) %||% + quote(base::asNamespace("keras3")$keras$Layer) - # R6Class() calls substitute() on inherit; - r_cls <- eval(as.call(list( - quote(R6::R6Class), + wrapper <- new_wrapped_py_class( classname = classname, - public = public, - active = NULL, + members = members, inherit = inherit, - cloneable = FALSE, - parent_env = parent_env - ))) + parent_env = parent_env, + private = private + ) - create_layer_wrapper(r_cls) + # convert wrapper into a composing layer rapper + prepend(formals(wrapper)) <- alist(object = ) + body(wrapper) <- bquote({ + args <- capture_args(ignore = "object", + enforce_all_dots_named = FALSE) + create_layer(.(as.symbol(classname)), object, args) + }) + + wrapper } + + + +# ' @param .composing Bare Keras Layers (`layer_*` functions) conventionally +# have `object` as the first argument, which allows users to instantiate +# (`initialize`) and `call` one motion. + + diff --git a/R/LearningRateSchedule.R b/R/LearningRateSchedule.R new file mode 100644 index 0000000000..831365c047 --- /dev/null +++ b/R/LearningRateSchedule.R @@ -0,0 +1,120 @@ +#' Define a custom `LearningRateSchedule` class +#' +#' @description +#' Subclass the keras learning rate schedule base class. +#' +#' You can use a learning rate schedule to modulate how the learning rate +#' of your optimizer changes over time. +#' +#' Several built-in learning rate schedules are available, such as +#' [`learning_rate_schedule_exponential_decay()`] or +#' [`learning_rate_schedule_piecewise_constant_decay()`]: +#' +#' ```{r} +#' lr_schedule <- learning_rate_schedule_exponential_decay( +#' initial_learning_rate = 1e-2, +#' decay_steps = 10000, +#' decay_rate = 0.9 +#' ) +#' optimizer <- optimizer_sgd(learning_rate = lr_schedule) +#' ``` +#' +#' A `LearningRateSchedule()` instance can be passed in as the `learning_rate` +#' argument of any optimizer. +#' +#' To implement your own schedule object, you should implement the `call` +#' method, which takes a `step` argument (a scalar integer backend tensor, the +#' current training step count). +#' Note that `step` is 0-based (i.e., the first step is `0`). +#' Like for any other Keras object, you can also optionally +#' make your object serializable by implementing the `get_config()` +#' and `from_config()` methods. +#' +#' # Example +#' +#' ```{r} +#' my_custom_learning_rate_schedule <- LearningRateSchedule( +#' classname = "MyLRSchedule", +#' initialize = function( initial_learning_rate) { +#' +#' self$initial_learning_rate <- initial_learning_rate +#' }, +#' +#' +#' call = function(step) { +#' # note that `step` is a tensor +#' # and call() will be traced via tf_function() or similar. +#' +#' str(step) # +#' +#' +#' # print 'step' every 1000 steps +#' op_cond((step %% 1000) == 0, +#' \() {tensorflow::tf$print(step); NULL}, +#' \() {NULL}) +#' self$initial_learning_rate / (step + 1) +#' } +#' ) +#' +#' optimizer <- optimizer_sgd( +#' learning_rate = my_custom_learning_rate_schedule(0.1) +#' ) +#' +#' # You can also call schedule instances directly +#' # (e.g., for interactive testing, or if implementing a custom optimizer) +#' schedule <- my_custom_learning_rate_schedule(0.1) +#' step <- keras$Variable(initializer = op_ones, +#' shape = shape(), +#' dtype = "int64") +#' schedule(step) +#' ``` +#' +#' # Methods available: +#' +#' * ``` +#' get_config() +#' ``` +#' +#' @param call,initialize,get_config +#' Recommended methods to implement. See description and details sections. +#' @returns A function that returns `LearningRateSchedule` instances, similar to the +#' built-in `learning_rate_schedule_*` family of functions. +#' @tether keras.optimizers.schedules.LearningRateSchedule +#' @inheritSection Layer Symbols in scope +#' @inheritParams Layer +#' @family optimizer learning rate schedules +LearningRateSchedule <- function(classname, + call = NULL, + initialize = NULL, + get_config = NULL, + ..., + public = list(), + private = list(), + inherit = NULL, + parent_env = parent.frame()) { + + members <- drop_nulls(named_list(initialize, call, get_config)) + members <- modifyList(members, list2(...), keep.null = TRUE) + members <- modifyList(members, public, keep.null = TRUE) + + members <- rename(members, "__call__" = "call", + .skip_existing = TRUE) + + members <- modify_intersection(members, list( + from_config = function(x) decorate_method(x, "classmethod") + )) + + inherit <- substitute(inherit) %||% + quote(base::asNamespace("keras3")$keras$optimizers$schedules$LearningRateSchedule) + + new_wrapped_py_class( + classname = classname, + members = members, + inherit = inherit, + parent_env = parent_env, + private = private + ) + +} + +# TODO: should all optimizer accept a plain R function to `learning_rate`? diff --git a/R/Loss.R b/R/Loss.R new file mode 100644 index 0000000000..5d52e311d6 --- /dev/null +++ b/R/Loss.R @@ -0,0 +1,111 @@ + +#' Subclass the base `Loss` class +#' +#' Use this to define a custom loss class. Note, in most cases you do not need +#' to subclass `Loss` to define a custom loss: you can also pass a bare R +#' function, or a named R function defined with [`custom_metric()`], as a loss +#' function to `compile()`. +#' +#' @param call +#' ```r +#' function(y_true, y_pred) +#' ``` +#' Method to be implemented by subclasses: +#' Function that contains the logic for loss calculation using +#' `y_true`, `y_pred`. +#' +#' @details +#' +#' Example subclass implementation: +#' +#' ```{r} +#' loss_custom_mse <- Loss( +#' classname = "CustomMeanSquaredError", +#' call = function(y_true, y_pred) { +#' op_mean(op_square(y_pred - y_true), axis = -1) +#' } +#' ) +#' +#' # Usage in compile() +#' model <- keras_model_sequential(input_shape = 10) |> layer_dense(10) +#' model |> compile(loss = loss_custom_mse()) +#' +#' # Standalone usage +#' mse <- loss_custom_mse(name = "my_custom_mse_instance") +#' +#' y_true <- op_arange(20) |> op_reshape(c(4, 5)) +#' y_pred <- op_arange(20) |> op_reshape(c(4, 5)) * 2 +#' (loss <- mse(y_true, y_pred)) +#' +#' loss2 <- (y_pred - y_true)^2 |> +#' op_mean(axis = -1) |> +#' op_mean() +#' +#' stopifnot(all.equal(as.array(loss), as.array(loss2))) +#' +#' sample_weight <-array(c(.25, .25, 1, 1)) +#' (weighted_loss <- mse(y_true, y_pred, sample_weight = sample_weight)) +#' +#' weighted_loss2 <- (y_true - y_pred)^2 |> +#' op_mean(axis = -1) |> +#' op_multiply(sample_weight) |> +#' op_mean() +#' +#' stopifnot(all.equal(as.array(weighted_loss), +#' as.array(weighted_loss2))) +#' ``` +# +#' # Methods defined by base `Loss` class: +#' +#' * ```r +#' initialize(name=NULL, reduction="sum_over_batch_size", dtype=NULL) +#' ``` +#' Args: +#' * `name` +#' * `reduction`: Valid values are one of `{"sum_over_batch_size", "sum", NULL, "none"}` +#' * `dtype` +#' +#' * ``` +#' __call__(y_true, y_pred, sample_weight=NULL) +#' ``` +#' Call the loss instance as a function, optionally with `sample_weight`. +#' +#' * ```r +#' get_config() +#' ``` +#' +#' @returns A function that returns `Loss` instances, similar to the +#' builtin loss functions. +#' @inheritSection Layer Symbols in scope +#' @inheritParams Layer +#' @export +#' @family losses +#' @tether keras.losses.Loss +Loss <- +function(classname, call = NULL, + ..., + public = list(), + private = list(), + inherit = NULL, + parent_env = parent.frame()) { + + members <- drop_nulls(named_list(call)) + members <- modifyList(members, list2(...), keep.null = TRUE) + members <- modifyList(members, public, keep.null = TRUE) + + members <- modify_intersection(members, list( + from_config = function(x) decorate_method(x, "classmethod") + )) + + inherit <- substitute(inherit) %||% + quote(base::asNamespace("keras3")$keras$Loss) + + new_wrapped_py_class( + classname = classname, + members = members, + inherit = inherit, + parent_env = parent_env, + private = private, + default_formals = function(name=NULL, reduction="sum_over_batch_size", dtype=NULL) {} + ) +} diff --git a/R/Metric.R b/R/Metric.R new file mode 100644 index 0000000000..f25f356fed --- /dev/null +++ b/R/Metric.R @@ -0,0 +1,210 @@ + +#' Subclass the base `Metric` class +#' +#' A `Metric` object encapsulates metric logic and state that can be used to +#' track model performance during training. It is what is returned by the family +#' of metric functions that start with prefix `metric_*`, as well as what is +#' returned by custom metrics defined with `Metric()`. +#' +#' # Examples +#' +#' ## Usage with `compile()`: +#' ```r +#' model |> compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = c(metric_SOME_METRIC(), metric_SOME_OTHER_METRIC()) +#' ) +#' ``` +#' +#' ## Standalone usage: +#' ```r +#' m <- metric_SOME_METRIC() +#' for (e in seq(epochs)) { +#' for (i in seq(train_steps)) { +#' c(y_true, y_pred, sample_weight = NULL) %<-% ... +#' m$update_state(y_true, y_pred, sample_weight) +#' } +#' cat('Final epoch result: ', as.numeric(m$result()), "\n") +#' m$reset_state() +#' } +#' ``` +#' +#' # Full Examples +#' +#' ## Usage with `compile()`: +#' ```{r} +#' model <- keras_model_sequential() +#' model |> +#' layer_dense(64, activation = "relu") |> +#' layer_dense(64, activation = "relu") |> +#' layer_dense(10, activation = "softmax") +#' model |> +#' compile(optimizer = optimizer_rmsprop(0.01), +#' loss = loss_categorical_crossentropy(), +#' metrics = metric_categorical_accuracy()) +#' +#' data <- random_uniform(c(1000, 32)) +#' labels <- random_uniform(c(1000, 10)) +#' +#' model |> fit(data, labels, verbose = 0) +#' ``` +#' +#' To be implemented by subclasses (custom metrics): +#' +#' * `initialize()`: All state variables should be created in this method by +#' calling `self$add_variable()` like: `self$var <- self$add_variable(...)`. +#' * `update_state()`: Updates all the state variables like: +#' `self$var$assign(...)`. +#' * `result()`: Computes and returns a scalar value or a named list of scalar values +#' for the metric from the state variables. +#' +#' Example subclass implementation: +#' +#' ```{r} +#' metric_binary_true_positives <- Metric( +#' classname = "BinaryTruePositives", +#' +#' initialize = function(name = 'binary_true_positives', ...) { +#' super$initialize(name = name, ...) +#' self$true_positives <- +#' self$add_weight(shape = shape(), +#' initializer = 'zeros', +#' name = 'true_positives') +#' }, +#' +#' update_state = function(y_true, y_pred, sample_weight = NULL) { +#' y_true <- op_cast(y_true, "bool") +#' y_pred <- op_cast(y_pred, "bool") +#' +#' values <- y_true & y_pred # `&` calls op_logical_and() +#' values <- op_cast(values, self$dtype) +#' if (!is.null(sample_weight)) { +#' sample_weight <- op_cast(sample_weight, self$dtype) +#' sample_weight <- op_broadcast_to(sample_weight, shape(values)) +#' values <- values * sample_weight # `*` calls op_multiply() +#' } +#' self$true_positives$assign(self$true_positives + op_sum(values)) +#' }, +#' +#' result = function() { +#' self$true_positives +#' } +#' ) +#' model <- keras_model_sequential(input_shape = 32) |> layer_dense(10) +#' model |> compile(loss = loss_binary_crossentropy(), +#' metrics = list(metric_binary_true_positives())) +#' model |> fit(data, labels, verbose = 0) +#' ``` +#' +#' # Methods defined by the base `Metric` class: +#' +#' * ``` +#' __call__(...) +#' ```` +#' Calling a metric instance self like `m(...)` is equivalent to calling: +#' ```r +#' function(...) { +#' m$update_state(...) +#' m$result() +#' } +#' ``` +#' +#' * ```r +#' initialize(dtype=NULL, name=NULL) +#' ``` +#' Initialize self. +#' +#' Args: +#' * `name`: (Optional) string name of the metric instance. +#' * `dtype`: (Optional) data type of the metric result. +#' +#' * ```r +#' add_variable(shape, initializer, dtype=NULL, aggregation = 'sum', name=NULL) +#' ``` +#' +#' * ```r +#' add_weight(shape=shape(), initializer=NULL, dtype=NULL, name=NULL) +#' ``` +#' +#' * ```r +#' get_config() +#' ``` +#' Return the serializable config of the metric. +#' +#' * ```r +#' reset_state() +#' ``` +#' Reset all of the metric state variables. +#' +#' This function is called between epochs/steps, +#' when a metric is evaluated during training. +#' +#' * ```r +#' result() +#' ``` +#' Compute the current metric value. +#' +#' Returns: +#' A scalar tensor, or a named list of scalar tensors. +#' +#' * ```r +#' stateless_result(metric_variables) +#' ``` +#' * ```r +#' stateless_reset_state() +#' ``` +#' * ```r +#' stateless_update_state(metric_variables, ...) +#' ``` +#' * ```r +#' update_state(...) +#' ``` +#' Accumulate statistics for the metric. +#' +#' # Readonly properties +#' +#' * `dtype` +#' +#' * `variables` +#' +#' @returns A function that returns `Metric` instances, similar to the +#' builtin metric functions. +#' @inheritSection Layer Symbols in scope +#' @inheritParams Layer +#' @param initialize,update_state,result +#' Recommended methods to implement. See description section. +#' @export +#' @family metrics +#' @tether keras.metrics.Metric +Metric <- +function(classname, + initialize = NULL, + update_state = NULL, + result = NULL, + ..., + public = list(), + private = list(), + inherit = NULL, + parent_env = parent.frame()) { + + members <- drop_nulls(named_list(initialize, update_state, result)) + members <- modifyList(members, list2(...), keep.null = TRUE) + members <- modifyList(members, public, keep.null = TRUE) + + members <- modify_intersection(members, list( + from_config = function(x) decorate_method(x, "classmethod") + )) + + inherit <- substitute(inherit) %||% + quote(base::asNamespace("keras3")$keras$Metric) + + new_wrapped_py_class( + classname = classname, + members = members, + inherit = inherit, + parent_env = parent_env, + private = private, + default_formals = function(dtype=NULL, name=NULL){} + ) +} diff --git a/R/Model.R b/R/Model.R new file mode 100644 index 0000000000..97b60eb142 --- /dev/null +++ b/R/Model.R @@ -0,0 +1,63 @@ +#' Subclass the base Keras `Model` Class +#' +#' @description +#' +#' This is for advanced use cases where you need to subclass the base `Model` +#' type, e.g., you want to override the `train_step()` method. +#' +#' If you just want to create or define a keras model, prefer [`keras_model()`] +#' or [`keras_model_sequential()`]. +#' +#' If you just want to encapsulate some custom logic and state, and don't need +#' to customize training behavior (besides calling `self$add_loss()` in the +#' `call()` method), prefer [`Layer()`]. +#' +#' @inheritSection Layer Symbols in scope +#' +#' @param initialize,call,train_step,predict_step,test_step,compute_loss,compute_metrics +#' Optional methods that can be overridden. +#' @inheritParams Layer +#' +#' @returns A model constructor function, which you can call to create an +#' instance of the new model type. +#' @seealso [active_property()] (e.g., for a `metrics` property implemented as a +#' function). +#' @export +Model <- +function(classname, + initialize = NULL, + call = NULL, + train_step = NULL, + predict_step = NULL, + test_step = NULL, + compute_loss = NULL, + compute_metrics = NULL, + ..., + public = list(), + private = list(), + inherit = NULL, + parent_env = parent.frame()) +{ + members <- drop_nulls(named_list(initialize, call, + train_step, predict_step, test_step, + compute_loss, compute_metrics)) + members <- modifyList(members, list2(...), keep.null = TRUE) + members <- modifyList(members, public, keep.null = TRUE) + + members <- modify_intersection(members, list( + from_config = function(x) decorate_method(x, "classmethod") + )) + + inherit <- substitute(inherit) %||% + quote(base::asNamespace("keras3")$keras$Model) + + new_wrapped_py_class( + classname = classname, + members = members, + inherit = inherit, + parent_env = parent_env, + private = private + ) + +} + diff --git a/R/activations.R b/R/activations.R index 1a4235519b..1e2a2925f0 100644 --- a/R/activations.R +++ b/R/activations.R @@ -1,144 +1,572 @@ -#' Activation functions + +#' Exponential Linear Unit. +#' +#' @description +#' The exponential linear unit (ELU) with `alpha > 0` is defined as: #' -#' Activations functions can either be used through [layer_activation()], or -#' through the activation argument supported by all forward layers. +#' - `x` if `x > 0` +#' - `alpha * exp(x) - 1` if `x < 0` #' -#' @details -#' - `activation_selu()` to be used together with the initialization "lecun_normal". -#' - `activation_selu()` to be used together with the dropout variant "AlphaDropout". +#' ELUs have negative values which pushes the mean of the activations +#' closer to zero. #' -#' @param x Tensor -#' @param axis Integer, axis along which the softmax normalization is applied -#' @param alpha Alpha value -#' @param max_value Max value -#' @param threshold Threshold value for thresholded activation. +#' Mean activations that are closer to zero enable faster learning as they +#' bring the gradient closer to the natural gradient. +#' ELUs saturate to a negative value when the argument gets smaller. +#' Saturation means a small derivative which decreases the variation +#' and the information that is propagated to the next layer. #' -#' @return Tensor with the same shape and dtype as \code{x}. +#' # Reference +#' - [Clevert et al., 2016](https://arxiv.org/abs/1511.07289) #' -#' @section References: +#' @param x +#' Input tensor. #' -#' - `activation_swish()`: [Searching for Activation Functions](https://arxiv.org/abs/1710.05941) -#' - `activation_gelu()`: [Gaussian Error Linear Units (GELUs)](https://arxiv.org/abs/1606.08415) -#' - `activation_selu()`: [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) -#' - `activation_elu()`: [Fast and Accurate Deep Network Learning by Exponential Linear Units (ELUs)](https://arxiv.org/abs/1511.07289) +#' @param alpha +#' Numeric. See description for details. #' -#' @seealso +#' @returns A tensor, the result from applying the activation to the input tensor `x`. #' @export -#' @description `relu(...)`: Applies the rectified linear unit activation function. -activation_relu <- function(x, alpha = 0.0, max_value = NULL, threshold = 0.0) { - args <- list( - x = x, - alpha = alpha, - max_value = max_value - ) - if (keras_version() >= "2.2.3") - args$threshold <- threshold +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.elu +activation_elu <- +function (x, alpha = 1) +{ + args <- capture_args() + do.call(keras$activations$elu, args) +} + - do.call(keras$activations$relu, args) +#' Exponential activation function. +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return +#' @export +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.exponential +activation_exponential <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$exponential, args) } -attr(activation_relu, "py_function_name") <- "relu" -#' @rdname activation_relu -#' @description `elu(...)`: Exponential Linear Unit. +#' Gaussian error linear unit (GELU) activation function. +#' +#' @description +#' The Gaussian error linear unit (GELU) is defined as: +#' +#' `gelu(x) = x * P(X <= x)` where `P(X) ~ N(0, 1)`, +#' i.e. `gelu(x) = 0.5 * x * (1 + erf(x / sqrt(2)))`. +#' +#' GELU weights inputs by their value, rather than gating +#' inputs by their sign as in ReLU. +#' +#' # Reference +#' - [Hendrycks et al., 2016](https://arxiv.org/abs/1606.08415) +#' +#' @param x +#' Input tensor. +#' +#' @param approximate +#' A `bool`, whether to enable approximation. +#' +#' @inherit activation_elu return #' @export -activation_elu <- function(x, alpha = 1.0) { - keras$activations$elu(x, alpha = alpha) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.gelu +activation_gelu <- +function (x, approximate = FALSE) +{ + args <- capture_args() + do.call(keras$activations$gelu, args) } -attr(activation_elu, "py_function_name") <- "elu" -#' @rdname activation_relu +#' Hard sigmoid activation function. +#' +#' @description +#' The hard sigmoid activation is defined as: +#' +#' - `0` if `if x <= -3` +#' - `1` if `x >= 3` +#' - `(x/6) + 0.5` if `-3 < x < 3` +#' +#' It's a faster, piecewise linear approximation +#' of the sigmoid activation. +#' +#' # Reference +#' - [Wikipedia "Hard sigmoid"](https://en.wikipedia.org/wiki/Hard_sigmoid) +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return #' @export -#' @description `selu(...)`: Scaled Exponential Linear Unit (SELU). -activation_selu <- function(x) { - keras$activations$selu(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.hard_sigmoid +activation_hard_sigmoid <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$hard_sigmoid, args) } -attr(activation_selu, "py_function_name") <- "selu" -#' @rdname activation_relu +#' Leaky relu activation function. +#' +#' @param x +#' Input tensor. +#' +#' @param negative_slope +#' A `float` that controls the slope +#' for values lower than the threshold. +#' +#' @inherit activation_elu return #' @export -#' @description `hard_sigmoid(...)`: Hard sigmoid activation function. -activation_hard_sigmoid <- function(x) { - keras$activations$hard_sigmoid(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.leaky_relu +activation_leaky_relu <- +function (x, negative_slope = 0.2) +{ + args <- capture_args() + do.call(keras$activations$leaky_relu, args) } -attr(activation_hard_sigmoid, "py_function_name") <- "hard_sigmoid" -#' @rdname activation_relu + +#' Linear activation function (pass-through). +#' +#' @description +#' A "linear" activation is an identity function: +#' it returns the input, unmodified. +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return #' @export -#' @description `linear(...)`: Linear activation function (pass-through). -activation_linear <- function(x) { - keras$activations$linear(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.linear +activation_linear <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$linear, args) } -attr(activation_linear, "py_function_name") <- "linear" -#' @rdname activation_relu + +#' Log-Softmax activation function. +#' +#' @description +#' Each input vector is handled independently. +#' The `axis` argument sets which axis of the input the function +#' is applied along. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Integer, axis along which the softmax is applied. +#' +#' @inherit activation_elu return #' @export -#' @description `sigmoid(...)`: Sigmoid activation function, `sigmoid(x) = 1 / (1 + exp(-x))`. -activation_sigmoid <- function(x) { - keras$activations$softmax(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.log_softmax +activation_log_softmax <- +function (x, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$activations$log_softmax, args) } -attr(activation_sigmoid, "py_function_name") <- "sigmoid" -#' @rdname activation_relu + +#' Mish activation function. +#' +#' @description +#' It is defined as: +#' +#' `mish(x) = x * tanh(softplus(x))` +#' +#' where `softplus` is defined as: +#' +#' `softplus(x) = log(exp(x) + 1)` +#' +#' # Reference +#' - [Misra, 2019](https://arxiv.org/abs/1908.08681) +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return #' @export -#' @description `softmax(...)`: Softmax converts a vector of values to a probability distribution. -activation_softmax <- function(x, axis = -1) { - args <- list(x = x) - if (keras_version() >= "2.0.2") - args$axis <- as.integer(axis) - do.call(keras$activations$softmax, args) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.mish +activation_mish <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$mish, args) } -attr(activation_softmax, "py_function_name") <- "softmax" -#' @rdname activation_relu + +#' Applies the rectified linear unit activation function. +#' +#' @description +#' With default values, this returns the standard ReLU activation: +#' `max(x, 0)`, the element-wise maximum of 0 and the input tensor. +#' +#' Modifying default parameters allows you to use non-zero thresholds, +#' change the max value of the activation, +#' and to use a non-zero multiple of the input for values below the threshold. +#' +#' # Examples +#' ```{r} +#' x <- c(-10, -5, 0, 5, 10) +#' activation_relu(x) +#' activation_relu(x, negative_slope = 0.5) +#' activation_relu(x, max_value = 5) +#' activation_relu(x, threshold = 5) +#' ``` +#' +#' @returns +#' A tensor with the same shape and dtype as input `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param negative_slope +#' A `numeric` that controls the slope +#' for values lower than the threshold. +#' +#' @param max_value +#' A `numeric` that sets the saturation threshold (the largest +#' value the function will return). +#' +#' @param threshold +#' A `numeric` giving the threshold value of the activation +#' function below which values will be damped or set to zero. +#' #' @export -#' @description `softplus(...)`: Softplus activation function, `softplus(x) = log(exp(x) + 1)`. -activation_softplus <- function(x) { - keras$activations$softplus(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.relu +activation_relu <- +function (x, negative_slope = 0, max_value = NULL, + threshold = 0) +{ + args <- capture_args() + do.call(keras$activations$relu, args) } -attr(activation_softplus, "py_function_name") <- "softplus" -#' @rdname activation_relu + +#' Relu6 activation function. +#' +#' @description +#' It's the ReLU function, but truncated to a maximum value of 6. +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return #' @export -#' @description `softsign(...)`: Softsign activation function, `softsign(x) = x / (abs(x) + 1)`. -activation_softsign <- function(x) { - keras$activations$softsign(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.relu6 +activation_relu6 <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$relu6, args) } -attr(activation_softsign, "py_function_name") <- "softsign" -#' @rdname activation_relu + +#' Scaled Exponential Linear Unit (SELU). +#' +#' @description +#' The Scaled Exponential Linear Unit (SELU) activation function is defined as: +#' +#' - `scale * x` if `x > 0` +#' - `scale * alpha * (exp(x) - 1)` if `x < 0` +#' +#' where `alpha` and `scale` are pre-defined constants +#' (`alpha = 1.67326324` and `scale = 1.05070098`). +#' +#' Basically, the SELU activation function multiplies `scale` (> 1) with the +#' output of the `activation_elu` function to ensure a slope larger +#' than one for positive inputs. +#' +#' The values of `alpha` and `scale` are +#' chosen so that the mean and variance of the inputs are preserved +#' between two consecutive layers as long as the weights are initialized +#' correctly (see [`initializer_lecun_normal()`]) +#' and the number of input units is "large enough" +#' (see reference paper for more information). +#' +#' # Notes +#' - To be used together with +#' [`initializer_lecun_normal()`]. +#' - To be used together with the dropout variant +#' `layer_alpha_dropout()` (legacy, depracated). +#' +#' # Reference +#' - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return +#' @export +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.selu +activation_selu <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$selu, args) +} + + +#' Sigmoid activation function. +#' +#' @description +#' It is defined as: `sigmoid(x) = 1 / (1 + exp(-x))`. +#' +#' For small values (<-5), +#' `sigmoid` returns a value close to zero, and for large values (>5) +#' the result of the function gets close to 1. +#' +#' Sigmoid is equivalent to a 2-element softmax, where the second element is +#' assumed to be zero. The sigmoid function always returns a value between +#' 0 and 1. +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return +#' @export +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.sigmoid +activation_sigmoid <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$sigmoid, args) +} + + +#' Swish (or Silu) activation function. +#' +#' @description +#' It is defined as: `swish(x) = x * sigmoid(x)`. +#' +#' The Swish (or Silu) activation function is a smooth, +#' non-monotonic function that is unbounded above and +#' bounded below. +#' +#' # Reference +#' - [Ramachandran et al., 2017](https://arxiv.org/abs/1710.05941) +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return +#' @export +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.silu +activation_silu <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$silu, args) +} + + +#' Softmax converts a vector of values to a probability distribution. +#' +#' @description +#' The elements of the output vector are in range `[0, 1]` and sum to 1. +#' +#' Each input vector is handled independently. +#' The `axis` argument sets which axis of the input the function +#' is applied along. +#' +#' Softmax is often used as the activation for the last +#' layer of a classification network because the result could be interpreted as +#' a probability distribution. +#' +#' The softmax of each vector x is computed as +#' `exp(x) / sum(exp(x))`. +#' +#' The input values in are the log-odds of the resulting probability. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Integer, axis along which the softmax is applied. +#' +#' @inherit activation_elu return #' @export -#' @description `tanh(...)`: Hyperbolic tangent activation function. -activation_tanh <- function(x) { - keras$activations$tanh(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.softmax +activation_softmax <- +function (x, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$activations$softmax, args) } -attr(activation_tanh, "py_function_name") <- "tanh" -#' @rdname activation_relu + +#' Softplus activation function. +#' +#' @description +#' It is defined as: `softplus(x) = log(exp(x) + 1)`. +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return #' @export -#' @description `exponential(...)`: Exponential activation function. -activation_exponential <- function(x) { - keras$activations$exponential(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.softplus +activation_softplus <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$softplus, args) } -attr(activation_exponential, "py_function_name") <- "exponential" -#' @rdname activation_relu + +#' Softsign activation function. +#' +#' @description +#' Softsign is defined as: `softsign(x) = x / (abs(x) + 1)`. +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return #' @export -#' @description `gelu(...)`: Applies the Gaussian error linear unit (GELU) activation function. -#' @param approximate A bool, whether to enable approximation. -activation_gelu <- function(x, approximate=FALSE) { - keras$activations$gelu(x, approximate) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.softsign +activation_softsign <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$softsign, args) } -attr(activation_gelu, "py_function_name") <- "gelu" -#' @rdname activation_relu + +#' Hyperbolic tangent activation function. +#' +#' @description +#' It is defined as: +#' `tanh(x) = sinh(x) / cosh(x)`, i.e. +#' `tanh(x) = ((exp(x) - exp(-x)) / (exp(x) + exp(-x)))`. +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return #' @export -#' @description `swish(...)`: Swish activation function, `swish(x) = x * sigmoid(x)`. -activation_swish <- function(x) { - keras$activations$swish(x) +#' @family activations +#' @seealso +#' + +# + +#' @tether keras.activations.tanh +activation_tanh <- +function (x) +{ + args <- capture_args() + do.call(keras$activations$tanh, args) } -attr(activation_swish, "py_function_name") <- "swish" + + +#' Hard SiLU activation function, also known as Hard Swish. +#' +#' @description +#' It is defined as: +#' +#' - `0` if `if x < -3` +#' - `x` if `x > 3` +#' - `x * (x + 3) / 6` if `-3 <= x <= 3` +#' +#' It's a faster, piecewise linear approximation of the silu activation. +#' +#' # Reference +#' - [A Howard, 2019](https://arxiv.org/abs/1905.02244) +#' +#' @param x +#' Input tensor. +#' +#' @inherit activation_elu return +#' @export +#' @tether keras.activations.hard_silu +# @seealso +# + +activation_hard_silu <- + structure(function (x) + { + args <- capture_args(NULL) + do.call(keras$activations$hard_silu, args) + }, py_function_name = "hard_silu") + +#' @rdname activation_hard_silu +#' @export +activation_hard_swish <- + structure(function (x) + { + args <- capture_args(NULL) + do.call(keras$activations$hard_swish, args) + }, py_function_name = "hard_silu") diff --git a/R/applications.R b/R/applications.R index bfbe6af721..31e3fc5b3f 100644 --- a/R/applications.R +++ b/R/applications.R @@ -1,601 +1,2798 @@ - -#' Instantiates the Xception architecture +#' Instantiates the ConvNeXtBase architecture. +#' +#' @description #' -#' @details +#' # References +#' - [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +#' (CVPR 2022) #' #' For image classification use cases, see #' [this page for detailed examples]( -#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). -#' +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). #' For transfer learning use cases, make sure to read the #' [guide to transfer learning & fine-tuning]( -#' https://keras.io/guides/transfer_learning/). -#' -#' The default input image size for this model is 299x299. -#' -#' @note -#' Each Keras Application typically expects a specific kind of input preprocessing. -#' For Xception, call `xception_preprocess_input()` on your -#' inputs before passing them to the model. -#' `xception_preprocess_input()` will scale input pixels between -1 and 1. -#' -#' @section -#' Reference: -#' - [Xception: Deep Learning with Depthwise Separable Convolutions]( -#' https://arxiv.org/abs/1610.02357) (CVPR 2017) -#' -#' @param include_top Whether to include the fully-connected +#' https://keras.io/guides/transfer_learning/). +#' +#' The `base`, `large`, and `xlarge` models were first pre-trained on the +#' ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +#' pre-trained parameters of the models were assembled from the +#' [official repository](https://github.com/facebookresearch/ConvNeXt). To get a +#' sense of how these parameters were converted to Keras compatible parameters, +#' please refer to +#' [this repository](https://github.com/sayakpaul/keras-convnext-conversion). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ConvNeXt, preprocessing is included in the model using a `Normalization` +#' layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +#' pixels with values in the `[0-255]` range. +#' +#' When calling the `summary()` method after instantiating a ConvNeXt model, +#' prefer setting the `expand_nested` argument `summary()` to `TRUE` to better +#' investigate the instantiated model. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected #' layer at the top of the network. Defaults to `TRUE`. #' -#' @param weights One of `NULL` (random initialization), -#' `'imagenet'` (pre-training on ImageNet), -#' or the path to the weights file to be loaded. Defaults to `'imagenet'`. +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights +#' file to be loaded. Defaults to `"imagenet"`. #' -#' @param input_tensor Optional Keras tensor -#' (i.e. output of `layer_input()`) +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) #' to use as image input for the model. #' -#' @param input_shape optional shape list, only to be specified -#' if `include_top` is FALSE (otherwise the input shape -#' has to be `(299, 299, 3)`. -#' It should have exactly 3 inputs channels, -#' and width and height should be no smaller than 71. -#' E.g. `(150, 150, 3)` would be one valid value. +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. #' -#' @param pooling Optional pooling mode for feature extraction +#' @param pooling +#' Optional pooling mode for feature extraction #' when `include_top` is `FALSE`. Defaults to `NULL`. #' - `NULL` means that the output of the model will be -#' the 4D tensor output of the -#' last convolutional layer. -#' - `'avg'` means that global average pooling -#' will be applied to the output of the -#' last convolutional layer, and thus -#' the output of the model will be a 2D tensor. -#' - `'max'` means that global max pooling will -#' be applied. -#' -#' @param classes Optional number of classes to classify images into, only to be -#' specified if `include_top` is TRUE, and if no `weights` argument is -#' specified. Defaults to 1000 (number of ImageNet classes). -#' -#' @param classifier_activation A string or callable. The activation function to -#' use on the "top" layer. Ignored unless `include_top = TRUE`. Set -#' `classifier_activation = NULL` to return the logits of the "top" layer. -#' Defaults to `'softmax'`. When loading pretrained weights, -#' `classifier_activation` can only be `NULL` or `"softmax"`. -#' -#' @param ... For backwards and forwards compatibility -#' -#' -#' @param x `preprocess_input()` takes an array or floating point tensor, 3D or -#' 4D with 3 color channels, with values in the range `[0, 255]`. +#' the 4D tensor output of the last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. #' -#' @seealso -#' + -#' + +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. #' +#' @param model_name +#' String, name for the model. #' #' @export -application_xception <- -function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, - input_shape = NULL, pooling = NULL, classes = 1000, - classifier_activation='softmax', ...) +#' @seealso +#' + +# + +#' @tether keras.applications.ConvNeXtBase +application_convnext_base <- +function (model_name = "convnext_base", include_top = TRUE, include_preprocessing = TRUE, + weights = "imagenet", input_tensor = NULL, input_shape = NULL, + pooling = NULL, classes = 1000L, classifier_activation = "softmax") { - verify_application_prerequistes() - args <- capture_args(match.call(), list( - classes = as.integer, - input_shape = normalize_shape)) - do.call(keras$applications$Xception, args) -} - - -#' @rdname application_xception -#' @export -xception_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$xception$preprocess_input) + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ConvNeXtBase, args) + set_preprocessing_attributes(model, keras$applications$convnext) } -#' VGG16 and VGG19 models for Keras. -#' -#' @details Optionally loads weights pre-trained on ImageNet. +#' Instantiates the ConvNeXtLarge architecture. #' -#' The `imagenet_preprocess_input()` function should be used for image preprocessing. +#' @description #' -#' @inheritParams application_xception +#' # References +#' - [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +#' (CVPR 2022) #' -#' @param include_top whether to include the 3 fully-connected layers at the top -#' of the network. -#' @param input_shape optional shape list, only to be specified if `include_top` -#' is FALSE (otherwise the input shape has to be `(224, 224, 3)` It should -#' have exactly 3 inputs channels, and width and height should be no smaller -#' than 32. E.g. `(200, 200, 3)` would be one valid value. +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' The `base`, `large`, and `xlarge` models were first pre-trained on the +#' ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +#' pre-trained parameters of the models were assembled from the +#' [official repository](https://github.com/facebookresearch/ConvNeXt). To get a +#' sense of how these parameters were converted to Keras compatible parameters, +#' please refer to +#' [this repository](https://github.com/sayakpaul/keras-convnext-conversion). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ConvNeXt, preprocessing is included in the model using a `Normalization` +#' layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +#' pixels with values in the `[0-255]` range. +#' +#' When calling the `summary()` method after instantiating a ConvNeXt model, +#' prefer setting the `expand_nested` argument `summary()` to `TRUE` to better +#' investigate the instantiated model. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. #' -#' @return Keras model instance. +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights +#' file to be loaded. Defaults to `"imagenet"`. #' -#' @section Reference: - [Very Deep Convolutional Networks for Large-Scale Image -#' Recognition](https://arxiv.org/abs/1409.1556) +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. #' -#' @name application_vgg +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. #' -#' @examples -#' \dontrun{ -#' library(keras) +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. #' -#' model <- application_vgg16(weights = 'imagenet', include_top = FALSE) +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. #' -#' img_path <- "elephant.jpg" -#' img <- image_load(img_path, target_size = c(224,224)) -#' x <- image_to_array(img) -#' x <- array_reshape(x, c(1, dim(x))) -#' x <- imagenet_preprocess_input(x) +#' @param model_name +#' String, name for the model. #' -#' features <- model %>% predict(x) -#' } #' @export -application_vgg16 <- -function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, - input_shape = NULL, pooling = NULL, classes = 1000, - classifier_activation='softmax') -{ - verify_application_prerequistes() - args <- capture_args(match.call(), list( - classes = as.integer, - input_shape = normalize_shape)) - do.call(keras$applications$VGG16, args) -} - -#' @rdname application_vgg -#' @export -application_vgg19 <- -function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, - input_shape = NULL, pooling = NULL, classes = 1000, - classifier_activation='softmax') +#' @seealso +#' + +# + +#' @tether keras.applications.ConvNeXtLarge +application_convnext_large <- +function (model_name = "convnext_large", include_top = TRUE, + include_preprocessing = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") { - verify_application_prerequistes() - args <- capture_args(match.call(), list( - classes = as.integer, - input_shape = normalize_shape)) - do.call(keras$applications$VGG19, args) + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ConvNeXtLarge, args) + set_preprocessing_attributes(model, keras$applications$convnext) } -#' Instantiates the ResNet architecture +#' Instantiates the ConvNeXtSmall architecture. #' -#' @details -#' Reference: -#' - [Deep Residual Learning for Image Recognition]( -#' https://arxiv.org/abs/1512.03385) (CVPR 2015) +#' @description +#' +#' # References +#' - [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +#' (CVPR 2022) #' #' For image classification use cases, see #' [this page for detailed examples]( -#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). -#' +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). #' For transfer learning use cases, make sure to read the #' [guide to transfer learning & fine-tuning]( -#' https://keras.io/guides/transfer_learning/). +#' https://keras.io/guides/transfer_learning/). +#' +#' The `base`, `large`, and `xlarge` models were first pre-trained on the +#' ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +#' pre-trained parameters of the models were assembled from the +#' [official repository](https://github.com/facebookresearch/ConvNeXt). To get a +#' sense of how these parameters were converted to Keras compatible parameters, +#' please refer to +#' [this repository](https://github.com/sayakpaul/keras-convnext-conversion). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ConvNeXt, preprocessing is included in the model using a `Normalization` +#' layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +#' pixels with values in the `[0-255]` range. +#' +#' When calling the `summary()` method after instantiating a ConvNeXt model, +#' prefer setting the `expand_nested` argument `summary()` to `TRUE` to better +#' investigate the instantiated model. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. #' -#' Note: each Keras Application expects a specific kind of input preprocessing. -#' For ResNet, call `tf.keras.applications.resnet.preprocess_input` on your -#' inputs before passing them to the model. -#' `resnet.preprocess_input` will convert the input images from RGB to BGR, -#' then will zero-center each color channel with respect to the ImageNet dataset, -#' without scaling. +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights +#' file to be loaded. Defaults to `"imagenet"`. #' -#' @inheritParams application_efficientnet +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. #' -#' @param input_shape optional shape list, only to be specified -#' if `include_top` is FALSE (otherwise the input shape -#' has to be `c(224, 224, 3)` (with `'channels_last'` data format) -#' or `c(3, 224, 224)` (with `'channels_first'` data format). -#' It should have exactly 3 inputs channels, -#' and width and height should be no smaller than 32. -#' E.g. `c(200, 200, 3)` would be one valid value. +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. #' -#' @param x `preprocess_input()` takes an array or floating point tensor, 3D or -#' 4D with 3 color channels, with values in the range `[0, 255]`. +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. #' -#' @param ... For backwards and forwards compatibility +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. #' -#' @name application_resnet -#' @rdname application_resnet +#' @param model_name +#' String, name for the model. #' -#' @seealso -#' + -#' + -#' + -#' + -#' + -#' + -#' + -#' -#' @examples -#' \dontrun{ -#' library(keras) -#' -#' # instantiate the model -#' model <- application_resnet50(weights = 'imagenet') -#' -#' # load the image -#' img_path <- "elephant.jpg" -#' img <- image_load(img_path, target_size = c(224,224)) -#' x <- image_to_array(img) -#' -#' # ensure we have a 4d tensor with single element in the batch dimension, -#' # the preprocess the input for prediction using resnet50 -#' x <- array_reshape(x, c(1, dim(x))) -#' x <- imagenet_preprocess_input(x) -#' -#' # make predictions then decode and print them -#' preds <- model %>% predict(x) -#' imagenet_decode_predictions(preds, top = 3)[[1]] -#' } -NULL - -## TODO: maybe expand all the application wrappers to use this? -## then clean up with `formals(fn)$classifier_activation <- NULL` where needed -new_application_resnet_wrapper <- function(name) { - args <- alist(include_top = TRUE, weights = "imagenet", input_tensor = NULL, - input_shape = NULL, pooling = NULL, classes = 1000) - if(grepl("V2$", name)) - args <- c(args, alist(classifier_activation='softmax')) - args <- c(args, alist(... = )) - - body <- substitute({ - args <- capture_args(match.call(), list( - classes = as.integer, - input_shape = normalize_shape)) - do.call(keras$applications$NAME, args) - }, list(NAME = name)) - - as.function(c(args, body), envir = parent.frame()) -} - -#' @export -#' @rdname application_resnet -application_resnet50 <- new_application_resnet_wrapper("ResNet50") - -#' @export -#' @rdname application_resnet -application_resnet101 <- new_application_resnet_wrapper("ResNet101") - -#' @export -#' @rdname application_resnet -application_resnet152 <- new_application_resnet_wrapper("ResNet152") - -#' @export -#' @rdname application_resnet -application_resnet50_v2 <- new_application_resnet_wrapper("ResNet50V2") - -#' @export -#' @rdname application_resnet -application_resnet101_v2 <- new_application_resnet_wrapper("ResNet101V2") - -#' @export -#' @rdname application_resnet -application_resnet152_v2 <- new_application_resnet_wrapper("ResNet152V2") - - #' @export -#' @rdname application_resnet -resnet_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$resnet$preprocess_input) -} - -#' @export -#' @rdname application_resnet -resnet_v2_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$resnet_v2$preprocess_input) +#' @seealso +#' + +# + +#' @tether keras.applications.ConvNeXtSmall +application_convnext_small <- +function (model_name = "convnext_small", include_top = TRUE, + include_preprocessing = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ConvNeXtSmall, args) + set_preprocessing_attributes(model, keras$applications$convnext) } - - - -#' Inception V3 model, with weights pre-trained on ImageNet. +#' Instantiates the ConvNeXtTiny architecture. +#' +#' @description +#' +#' # References +#' - [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +#' (CVPR 2022) +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' The `base`, `large`, and `xlarge` models were first pre-trained on the +#' ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +#' pre-trained parameters of the models were assembled from the +#' [official repository](https://github.com/facebookresearch/ConvNeXt). To get a +#' sense of how these parameters were converted to Keras compatible parameters, +#' please refer to +#' [this repository](https://github.com/sayakpaul/keras-convnext-conversion). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ConvNeXt, preprocessing is included in the model using a `Normalization` +#' layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +#' pixels with values in the `[0-255]` range. +#' +#' When calling the `summary()` method after instantiating a ConvNeXt model, +#' prefer setting the `expand_nested` argument `summary()` to `TRUE` to better +#' investigate the instantiated model. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. #' -#' @details -#' Do note that the input image format for this model is different than for -#' the VGG16 and ResNet models (299x299 instead of 224x224). +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights +#' file to be loaded. Defaults to `"imagenet"`. #' -#' The `inception_v3_preprocess_input()` function should be used for image -#' preprocessing. +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. #' -#' @inheritParams application_xception +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. #' -#' @return A Keras model instance. +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. #' -#' @section Reference: -#' - [Rethinking the Inception Architecture for Computer Vision](https://arxiv.org/abs/1512.00567) +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @param model_name +#' String, name for the model. #' #' @export -application_inception_v3 <- function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000, classifier_activation='softmax', ...) { - verify_application_prerequistes() - args <- capture_args(match.call(), list( - input_shape = normalize_shape, classes = as.integer)) - do.call(keras$applications$InceptionV3, args) -} - - -#' @rdname application_inception_v3 -#' @export -inception_v3_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$inception_v3$preprocess_input) +#' @seealso +#' + +# + +#' @tether keras.applications.ConvNeXtTiny +application_convnext_tiny <- +function (model_name = "convnext_tiny", include_top = TRUE, include_preprocessing = TRUE, + weights = "imagenet", input_tensor = NULL, input_shape = NULL, + pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ConvNeXtTiny, args) + set_preprocessing_attributes(model, keras$applications$convnext) } -#' Inception-ResNet v2 model, with weights trained on ImageNet +#' Instantiates the ConvNeXtXLarge architecture. #' +#' @description #' -#' @inheritParams application_xception +#' # References +#' - [A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) +#' (CVPR 2022) #' -#' @return A Keras model instance. +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' The `base`, `large`, and `xlarge` models were first pre-trained on the +#' ImageNet-21k dataset and then fine-tuned on the ImageNet-1k dataset. The +#' pre-trained parameters of the models were assembled from the +#' [official repository](https://github.com/facebookresearch/ConvNeXt). To get a +#' sense of how these parameters were converted to Keras compatible parameters, +#' please refer to +#' [this repository](https://github.com/sayakpaul/keras-convnext-conversion). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ConvNeXt, preprocessing is included in the model using a `Normalization` +#' layer. ConvNeXt models expect their inputs to be float or uint8 tensors of +#' pixels with values in the `[0-255]` range. +#' +#' When calling the `summary()` method after instantiating a ConvNeXt model, +#' prefer setting the `expand_nested` argument `summary()` to `TRUE` to better +#' investigate the instantiated model. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. #' -#' @details -#' Do note that the input image format for this model is different than for -#' the VGG16 and ResNet models (299x299 instead of 224x224). +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet-1k), or the path to the weights +#' file to be loaded. Defaults to `"imagenet"`. #' -#' The `inception_resnet_v2_preprocess_input()` function should be used for image -#' preprocessing. +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. #' -#' @section Reference: -#' - [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261)(https://arxiv.org/abs/1512.00567) +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. #' -#' @export -application_inception_resnet_v2 <- -function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000, classifier_activation='softmax', ...) { - verify_application_prerequistes() - args <- capture_args(match.call(), list( - input_shape = normalize_shape, classes = as.integer)) - do.call(keras$applications$InceptionResNetV2, args) -} - -#' @rdname application_inception_resnet_v2 -#' @export -inception_resnet_v2_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$inception_resnet_v2$preprocess_input) -} - -#' Decodes the prediction of an ImageNet model. +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. #' -#' @param preds Tensor encoding a batch of predictions. -#' @param top integer, how many top-guesses to return. +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. #' -#' @return List of data frames with variables `class_name`, `class_description`, -#' and `score` (one data frame per sample in batch input). +#' @param model_name +#' String, name for the model. #' #' @export -imagenet_decode_predictions <- function(preds, top = 5) { - - # decode predictions - # we use the vgg16 function which is the same as imagenet_utils - decoded <- keras$applications$vgg16$decode_predictions( - preds = preds, - top = as.integer(top) - ) - - # convert to a list of data frames - lapply(decoded, function(x) { - m <- t(sapply(1:length(x), function(n) x[[n]])) - data.frame(class_name = as.character(m[,1]), - class_description = as.character(m[,2]), - score = as.numeric(m[,3]), - stringsAsFactors = FALSE) - }) +#' @seealso +#' + +# + +#' @tether keras.applications.ConvNeXtXLarge +application_convnext_xlarge <- +function (model_name = "convnext_xlarge", include_top = TRUE, + include_preprocessing = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ConvNeXtXLarge, args) + set_preprocessing_attributes(model, keras$applications$convnext) } -#' Preprocesses a tensor or array encoding a batch of images. +#' Instantiates the Densenet121 architecture. #' -#' @param x Input Numpy or symbolic tensor, 3D or 4D. -#' @param data_format Data format of the image tensor/array. -#' @param mode One of "caffe", "tf", or "torch" -#' - caffe: will convert the images from RGB to BGR, -#' then will zero-center each color channel with -#' respect to the ImageNet dataset, -#' without scaling. -#' - tf: will scale pixels between -1 and 1, sample-wise. -#' - torch: will scale pixels between 0 and 1 and then -#' will normalize each channel with respect to the -#' ImageNet dataset. +#' @description #' -#' @return Preprocessed tensor or array. +#' # Reference +#' - [Densely Connected Convolutional Networks]( +#' https://arxiv.org/abs/1608.06993) (CVPR 2017) #' -#' @export -imagenet_preprocess_input <- function(x, data_format = NULL, mode = "caffe") { - args <- list( - x = x, - # we use the vgg16 function which is the same as imagenet_utils - preprocessor = keras$applications$vgg16$preprocess_input - ) - if (keras_version() >= "2.0.9") { - args$data_format <- data_format - # no longer exists in 2.2 - if (tensorflow::tf_version() <= "2.1") - args$mode <- mode - } - do.call(preprocess_input, args) -} - - -#' MobileNet model architecture. -#' -#' @details -#' -#' The `mobilenet_preprocess_input()` function should be used for image -#' preprocessing. To load a saved instance of a MobileNet model use -#' the `mobilenet_load_model_hdf5()` function. To prepare image input -#' for MobileNet use `mobilenet_preprocess_input()`. To decode -#' predictions use `mobilenet_decode_predictions()`. -#' -#' @inheritParams imagenet_decode_predictions -#' @inheritParams load_model_hdf5 -#' @inheritParams application_xception -#' -#' @param input_shape optional shape list, only to be specified if `include_top` -#' is FALSE (otherwise the input shape has to be `(224, 224, 3)` (with -#' `channels_last` data format) or (3, 224, 224) (with `channels_first` data -#' format). It should have exactly 3 inputs channels, and width and height -#' should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid -#' value. -#' @param alpha controls the width of the network. -#' - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. -#' - If `alpha` > 1.0, proportionally increases the number of filters in each layer. -#' - If `alpha` = 1, default number of filters from the paper are used at each layer. -#' @param depth_multiplier depth multiplier for depthwise convolution (also -#' called the resolution multiplier) -#' @param dropout dropout rate -#' @param include_top whether to include the fully-connected layer at the top of -#' the network. -#' @param weights `NULL` (random initialization), `imagenet` (ImageNet -#' weights), or the path to the weights file to be loaded. -#' @param input_tensor optional Keras tensor (i.e. output of `layer_input()`) -#' to use as image input for the model. -#' @param pooling Optional pooling mode for feature extraction when -#' `include_top` is `FALSE`. -#' - `NULL` means that the output of the model will be the 4D tensor output -#' of the last convolutional layer. -#' - `avg` means that global average pooling will be applied to the output -#' of the last convolutional layer, and thus the output of the model will -#' be a 2D tensor. -#' - `max` means that global max pooling will be applied. -#' @param classes optional number of classes to classify images into, only to be -#' specified if `include_top` is TRUE, and if no `weights` argument is -#' specified. -#' @param x input tensor, 4D -#' -#' @return `application_mobilenet()` and `mobilenet_load_model_hdf5()` return a -#' Keras model instance. `mobilenet_preprocess_input()` returns image input -#' suitable for feeding into a mobilenet model. `mobilenet_decode_predictions()` -#' returns a list of data frames with variables `class_name`, `class_description`, -#' and `score` (one data frame per sample in batch input). +#' Optionally loads weights pre-trained on ImageNet. +#' Note that the data format convention used by the model is +#' the one specified in your Keras config at `~/.keras/keras.json`. #' -#' @section Reference: -#' - [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861v1.pdf). +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For DenseNet, call [`application_preprocess_inputs()`] +#' on your inputs before passing them to the model. #' -#' @export -application_mobilenet <- -function(input_shape = NULL, - alpha = 1.0, - depth_multiplier = 1L, - dropout = 0.001, - include_top = TRUE, - weights = "imagenet", - input_tensor = NULL, - pooling = NULL, - classes = 1000L, - classifier_activation='softmax', - ...) { - args <- capture_args(match.call(), list( - input_shape = normalize_shape, - classes = as.integer, - depth_multiplier = as.integer)) - do.call(keras$applications$MobileNet, args) -} - - -#' @rdname application_mobilenet -#' @export -mobilenet_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$mobilenet$preprocess_input) -} - -#' @rdname application_mobilenet -#' @export -mobilenet_decode_predictions <- function(preds, top = 5) { - imagenet_decode_predictions(preds, top) -} - - -#' @rdname application_mobilenet -#' @export -mobilenet_load_model_hdf5 <- function(filepath) { - - custom_objects <- list( - relu6 = keras$applications$mobilenet$relu6 - ) - - if (keras_version() < "2.1.5") - custom_objects$DepthwiseConv2D <- keras$applications$mobilenet$DepthwiseConv2D - - load_model_hdf5(filepath, custom_objects = custom_objects) -} - - - -#' MobileNetV2 model architecture +#' @returns +#' A Keras model instance. #' -#' @inheritParams application_mobilenet +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. #' -#' @return `application_mobilenet_v2()` and `mobilenet_v2_load_model_hdf5()` return a -#' Keras model instance. `mobilenet_v2_preprocess_input()` returns image input -#' suitable for feeding into a mobilenet v2 model. `mobilenet_v2_decode_predictions()` -#' returns a list of data frames with variables `class_name`, `class_description`, -#' and `score` (one data frame per sample in batch input). +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(224, 224, 3)` (with `'channels_last'` data format) +#' or `(3, 224, 224)` (with `'channels_first'` data format). +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. #' -#' @section Reference: -#' - [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. #' -#' @seealso application_mobilenet +#' @param classifier_activation +#' A `str` or callable. +#' The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits +#' of the "top" layer. When loading pretrained weights, +#' `classifier_activation` can only be `NULL` or `"softmax"`. #' #' @export -application_mobilenet_v2 <- -function(input_shape = NULL, - alpha = 1.0, - include_top = TRUE, - weights = "imagenet", - input_tensor = NULL, - pooling = NULL, - classes = 1000, - classifier_activation = 'softmax', - ...) +#' @seealso +#' + +# + +#' @tether keras.applications.DenseNet121 +application_densenet121 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") { - args <- capture_args(match.call(), list( - input_shape = normalize_shape, - classes = as.integer)) - do.call(keras$applications$MobileNetV2, args) -} - -#' @rdname application_mobilenet_v2 -#' @export -mobilenet_v2_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$mobilenetv2$preprocess_input) + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$DenseNet121, args) + set_preprocessing_attributes(model, keras$applications$densenet) } -#' @rdname application_mobilenet_v2 -#' @export -mobilenet_v2_decode_predictions <- function(preds, top = 5) { - imagenet_decode_predictions(preds, top) -} - - -#' @rdname application_mobilenet_v2 -#' @export -mobilenet_v2_load_model_hdf5 <- function(filepath) { - - custom_objects <- list( - relu6 = keras$applications$mobilenetv2$mobilenet_v2$relu6 - ) - - if (keras_version() < "2.1.5") - custom_objects$DepthwiseConv2D <- keras$applications$mobilenet$DepthwiseConv2D - load_model_hdf5(filepath, custom_objects = custom_objects) -} - - -#' Instantiates the MobileNetV3Large architecture +#' Instantiates the Densenet169 architecture. #' -#' @details -#' Reference: -#' - [Searching for MobileNetV3]( -#' https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) +#' @description #' -#' The following table describes the performance of MobileNets v3: -#' ------------------------------------------------------------------------ -#' MACs stands for Multiply Adds +#' # Reference +#' - [Densely Connected Convolutional Networks]( +#' https://arxiv.org/abs/1608.06993) (CVPR 2017) +#' +#' Optionally loads weights pre-trained on ImageNet. +#' Note that the data format convention used by the model is +#' the one specified in your Keras config at `~/.keras/keras.json`. +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For DenseNet, call [`application_preprocess_inputs()`] +#' on your inputs before passing them to the model. +#' +#' @returns +#' A Keras model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(224, 224, 3)` (with `'channels_last'` data format) +#' or `(3, 224, 224)` (with `'channels_first'` data format). +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param classifier_activation +#' A `str` or callable. +#' The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits +#' of the "top" layer. When loading pretrained weights, +#' `classifier_activation` can only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.DenseNet169 +application_densenet169 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$DenseNet169, args) + set_preprocessing_attributes(model, keras$applications$densenet) +} + + +#' Instantiates the Densenet201 architecture. +#' +#' @description +#' +#' # Reference +#' - [Densely Connected Convolutional Networks]( +#' https://arxiv.org/abs/1608.06993) (CVPR 2017) +#' +#' Optionally loads weights pre-trained on ImageNet. +#' Note that the data format convention used by the model is +#' the one specified in your Keras config at `~/.keras/keras.json`. +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For DenseNet, call [`application_preprocess_inputs()`] +#' on your inputs before passing them to the model. +#' +#' @returns +#' A Keras model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(224, 224, 3)` (with `'channels_last'` data format) +#' or `(3, 224, 224)` (with `'channels_first'` data format). +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param classifier_activation +#' A `str` or callable. +#' The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits +#' of the "top" layer. When loading pretrained weights, +#' `classifier_activation` can only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.DenseNet201 +application_densenet201 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$DenseNet201, args) + set_preprocessing_attributes(model, keras$applications$densenet) +} + + +#' Instantiates the EfficientNetB0 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB0 +application_efficientnet_b0 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB0, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetB1 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB1 +application_efficientnet_b1 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB1, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetB2 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB2 +application_efficientnet_b2 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB2, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetB3 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB3 +application_efficientnet_b3 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB3, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetB4 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB4 +application_efficientnet_b4 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB4, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetB5 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB5 +application_efficientnet_b5 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB5, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetB6 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB6 +application_efficientnet_b6 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB6, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetB7 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( +#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNet, input preprocessing is included as part of the model +#' (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. EfficientNet models expect their inputs to be float +#' tensors of pixels with values in the `[0-255]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is TRUE, and +#' if no `weights` argument is specified. 1000 is how many +#' ImageNet classes there are. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `'softmax'`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetB7 +application_efficientnet_b7 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + ...) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetB7, args) + set_preprocessing_attributes(model, keras$applications$efficientnet) +} + + +#' Instantiates the EfficientNetV2B0 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNetV2: Smaller Models and Faster Training]( +#' https://arxiv.org/abs/2104.00298) (ICML 2021) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNetV2, by default input preprocessing is included as a part of +#' the model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, EfficientNetV2 models expect their +#' inputs to be float tensors of pixels with values in the `[0, 255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled EfficientNetV2 models expect their inputs to be +#' float tensors of pixels with values in the `[-1, 1]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to NULL. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `"avg"` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `"max"` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A string or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetV2B0 +application_efficientnet_v2b0 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetV2B0, args) + set_preprocessing_attributes(model, keras$applications$efficientnet_v2) +} + + +#' Instantiates the EfficientNetV2B1 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNetV2: Smaller Models and Faster Training]( +#' https://arxiv.org/abs/2104.00298) (ICML 2021) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNetV2, by default input preprocessing is included as a part of +#' the model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, EfficientNetV2 models expect their +#' inputs to be float tensors of pixels with values in the `[0, 255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled EfficientNetV2 models expect their inputs to be +#' float tensors of pixels with values in the `[-1, 1]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to NULL. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `"avg"` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `"max"` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A string or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetV2B1 +application_efficientnet_v2b1 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetV2B1, args) + set_preprocessing_attributes(model, keras$applications$efficientnet_v2) +} + + +#' Instantiates the EfficientNetV2B2 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNetV2: Smaller Models and Faster Training]( +#' https://arxiv.org/abs/2104.00298) (ICML 2021) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNetV2, by default input preprocessing is included as a part of +#' the model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, EfficientNetV2 models expect their +#' inputs to be float tensors of pixels with values in the `[0, 255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled EfficientNetV2 models expect their inputs to be +#' float tensors of pixels with values in the `[-1, 1]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to NULL. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `"avg"` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `"max"` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A string or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetV2B2 +application_efficientnet_v2b2 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetV2B2, args) + set_preprocessing_attributes(model, keras$applications$efficientnet_v2) +} + + +#' Instantiates the EfficientNetV2B3 architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNetV2: Smaller Models and Faster Training]( +#' https://arxiv.org/abs/2104.00298) (ICML 2021) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNetV2, by default input preprocessing is included as a part of +#' the model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, EfficientNetV2 models expect their +#' inputs to be float tensors of pixels with values in the `[0, 255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled EfficientNetV2 models expect their inputs to be +#' float tensors of pixels with values in the `[-1, 1]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `"avg"` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `"max"` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A string or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetV2B3 +application_efficientnet_v2b3 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetV2B3, args) + set_preprocessing_attributes(model, keras$applications$efficientnet_v2) +} + + +#' Instantiates the EfficientNetV2L architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNetV2: Smaller Models and Faster Training]( +#' https://arxiv.org/abs/2104.00298) (ICML 2021) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNetV2, by default input preprocessing is included as a part of +#' the model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, EfficientNetV2 models expect their +#' inputs to be float tensors of pixels with values in the `[0, 255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled EfficientNetV2 models expect their inputs to be +#' float tensors of pixels with values in the `[-1, 1]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `"avg"` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `"max"` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A string or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetV2L +application_efficientnet_v2l <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetV2L, args) + set_preprocessing_attributes(model, keras$applications$efficientnet_v2) +} + + +#' Instantiates the EfficientNetV2M architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNetV2: Smaller Models and Faster Training]( +#' https://arxiv.org/abs/2104.00298) (ICML 2021) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNetV2, by default input preprocessing is included as a part of +#' the model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, EfficientNetV2 models expect their +#' inputs to be float tensors of pixels with values in the `[0, 255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled EfficientNetV2 models expect their inputs to be +#' float tensors of pixels with values in the `[-1, 1]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `"avg"` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `"max"` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A string or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetV2M +application_efficientnet_v2m <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetV2M, args) + set_preprocessing_attributes(model, keras$applications$efficientnet_v2) +} + + +#' Instantiates the EfficientNetV2S architecture. +#' +#' @description +#' +#' # Reference +#' - [EfficientNetV2: Smaller Models and Faster Training]( +#' https://arxiv.org/abs/2104.00298) (ICML 2021) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For EfficientNetV2, by default input preprocessing is included as a part of +#' the model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, EfficientNetV2 models expect their +#' inputs to be float tensors of pixels with values in the `[0, 255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled EfficientNetV2 models expect their inputs to be +#' float tensors of pixels with values in the `[-1, 1]` range. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE`. +#' It should have exactly 3 inputs channels. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. Defaults to `NULL`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional layer. +#' - `"avg"` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a 2D tensor. +#' - `"max"` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000 (number of +#' ImageNet classes). +#' +#' @param classifier_activation +#' A string or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' Defaults to `"softmax"`. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing layer at the bottom of the network. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.EfficientNetV2S +application_efficientnet_v2s <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$EfficientNetV2S, args) + set_preprocessing_attributes(model, keras$applications$efficientnet_v2) +} + + +#' Instantiates the Inception-ResNet v2 architecture. +#' +#' @description +#' +#' # Reference +#' - [Inception-v4, Inception-ResNet and the Impact of +#' Residual Connections on Learning](https://arxiv.org/abs/1602.07261) +#' (AAAI 2017) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of +#' input preprocessing. For `InceptionResNetV2`, call +#' [`application_preprocess_inputs()`] +#' on your inputs before passing them to the model. +#' [`application_preprocess_inputs()`] +#' will scale input pixels between -1 and 1. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(299, 299, 3)` +#' (with `'channels_last'` data format) +#' or `(3, 299, 299)` (with `'channels_first'` data format). +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 75. +#' E.g. `(150, 150, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the last convolutional block. +#' - `'avg'` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `'max'` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, +#' and if no `weights` argument is specified. +#' +#' @param classifier_activation +#' A `str` or callable. +#' The activation function to use on the "top" layer. +#' Ignored unless `include_top=TRUE`. +#' Set `classifier_activation=NULL` to return the logits +#' of the "top" layer. When loading pretrained weights, +#' `classifier_activation` can only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.InceptionResNetV2 +application_inception_resnet_v2 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$InceptionResNetV2, args) + set_preprocessing_attributes(model, keras$applications$inception_resnet_v2) +} + + +#' Instantiates the Inception v3 architecture. +#' +#' @description +#' +#' # Reference +#' - [Rethinking the Inception Architecture for Computer Vision]( +#' https://arxiv.org/abs/1512.00567) (CVPR 2016) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For `InceptionV3`, call +#' [`application_preprocess_inputs()`] on your inputs +#' before passing them to the model. +#' [`application_preprocess_inputs()`] will scale input pixels between `-1` and `1`. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top, as the last layer of the network. +#' Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), +#' `imagenet` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. `input_tensor` is useful for +#' sharing inputs between multiple different networks. +#' Defaults to `NULL`. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(299, 299, 3)` (with `channels_last` data format) +#' or `(3, 299, 299)` (with `channels_first` data format). +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 75. +#' E.g. `(150, 150, 3)` would be one valid value. +#' `input_shape` will be ignored if the `input_tensor` is provided. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` (default) means that the output of the model will be +#' the 4D tensor output of the last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. Defaults to 1000. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function +#' to use on the "top" layer. Ignored unless `include_top=TRUE`. +#' Set `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` +#' can only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.InceptionV3 +application_inception_v3 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$InceptionV3, args) + set_preprocessing_attributes(model, keras$applications$inception_v3) +} + + +#' Instantiates the MobileNet architecture. +#' +#' @description +#' +#' # Reference +#' - [MobileNets: Efficient Convolutional Neural Networks +#' for Mobile Vision Applications]( +#' https://arxiv.org/abs/1704.04861) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For MobileNet, call [`application_preprocess_inputs()`] +#' on your inputs before passing them to the model. +#' [`application_preprocess_inputs()`] will scale input pixels between `-1` and `1`. +#' +#' @returns +#' A model instance. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). +#' It should have exactly 3 inputs channels, and width and +#' height should be no smaller than 32. E.g. `(200, 200, 3)` would +#' be one valid value. Defaults to `NULL`. +#' `input_shape` will be ignored if the `input_tensor` is provided. +#' +#' @param alpha +#' Controls the width of the network. This is known as the width +#' multiplier in the MobileNet paper. +#' - If `alpha < 1.0`, proportionally decreases the number +#' of filters in each layer. +#' - If `alpha > 1.0`, proportionally increases the number +#' of filters in each layer. +#' - If `alpha == 1`, default number of filters from the paper +#' are used at each layer. Defaults to `1.0`. +#' +#' @param depth_multiplier +#' Depth multiplier for depthwise convolution. +#' This is called the resolution multiplier in the MobileNet paper. +#' Defaults to `1.0`. +#' +#' @param dropout +#' Dropout rate. Defaults to `0.001`. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected layer +#' at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), `"imagenet"` +#' (pre-training on ImageNet), or the path to the weights file +#' to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. `input_tensor` is useful +#' for sharing inputs between multiple different networks. +#' Defaults to `NULL`. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` (default) means that the output of the model will be +#' the 4D tensor output of the last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' Optional number of classes to classify images into, +#' only to be specified if `include_top` is `TRUE`, and if +#' no `weights` argument is specified. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function +#' to use on the "top" layer. Ignored unless `include_top=TRUE`. +#' Set `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` +#' can only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.MobileNet +application_mobilenet <- +function (input_shape = NULL, alpha = 1, depth_multiplier = 1L, + dropout = 0.001, include_top = TRUE, weights = "imagenet", + input_tensor = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(depth_multiplier = as_integer, + classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$MobileNet, args) + set_preprocessing_attributes(model, keras$applications$mobilenet) +} + + +#' Instantiates the MobileNetV2 architecture. +#' +#' @description +#' MobileNetV2 is very similar to the original MobileNet, +#' except that it uses inverted residual blocks with +#' bottlenecking features. It has a drastically lower +#' parameter count than the original MobileNet. +#' MobileNets support any input size greater +#' than 32 x 32, with larger image sizes +#' offering better performance. +#' +#' # Reference +#' - [MobileNetV2: Inverted Residuals and Linear Bottlenecks]( +#' https://arxiv.org/abs/1801.04381) (CVPR 2018) +#' +#' This function returns a Keras image classification model, +#' optionally loaded with weights pre-trained on ImageNet. +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For MobileNetV2, call +#' [`application_preprocess_inputs()`] +#' on your inputs before passing them to the model. +#' [`application_preprocess_inputs()`] will scale input pixels between `-1` and `1`. +#' +#' @returns +#' A model instance. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). +#' It should have exactly 3 inputs channels, and width and +#' height should be no smaller than 32. E.g. `(200, 200, 3)` would +#' be one valid value. Defaults to `NULL`. +#' `input_shape` will be ignored if the `input_tensor` is provided. +#' +#' @param alpha +#' Controls the width of the network. This is known as the width +#' multiplier in the MobileNet paper. +#' - If `alpha < 1.0`, proportionally decreases the number +#' of filters in each layer. +#' - If `alpha > 1.0`, proportionally increases the number +#' of filters in each layer. +#' - If `alpha == 1`, default number of filters from the paper +#' are used at each layer. Defaults to `1.0`. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected layer +#' at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' One of `NULL` (random initialization), `"imagenet"` +#' (pre-training on ImageNet), or the path to the weights file +#' to be loaded. Defaults to `"imagenet"`. +#' +#' @param input_tensor +#' Optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. `input_tensor` is useful +#' for sharing inputs between multiple different networks. +#' Defaults to `NULL`. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` (default) means that the output of the model will be +#' the 4D tensor output of the last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' Optional number of classes to classify images into, +#' only to be specified if `include_top` is `TRUE`, and if +#' no `weights` argument is specified. Defaults to `1000`. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function +#' to use on the "top" layer. Ignored unless `include_top=TRUE`. +#' Set `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` +#' can only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.MobileNetV2 +application_mobilenet_v2 <- +function (input_shape = NULL, alpha = 1, include_top = TRUE, + weights = "imagenet", input_tensor = NULL, pooling = NULL, + classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$MobileNetV2, args) + set_preprocessing_attributes(model, keras$applications$mobilenet_v2) +} + + +#' Instantiates the MobileNetV3Large architecture. +#' +#' @description +#' +#' # Reference +#' - [Searching for MobileNetV3]( +#' https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) +#' +#' The following table describes the performance of MobileNets v3: +#' ------------------------------------------------------------------------ +#' MACs stands for Multiply Adds +#' +#' |Classification Checkpoint|MACs(M)|Parameters(M)|Top1 Accuracy|Pixel1 CPU(ms)| +#' |---|---|---|---|---| +#' | mobilenet_v3_large_1.0_224 | 217 | 5.4 | 75.6 | 51.2 | +#' | mobilenet_v3_large_0.75_224 | 155 | 4.0 | 73.3 | 39.8 | +#' | mobilenet_v3_large_minimalistic_1.0_224 | 209 | 3.9 | 72.3 | 44.1 | +#' | mobilenet_v3_small_1.0_224 | 66 | 2.9 | 68.1 | 15.8 | +#' | mobilenet_v3_small_0.75_224 | 44 | 2.4 | 65.4 | 12.8 | +#' | mobilenet_v3_small_minimalistic_1.0_224 | 65 | 2.0 | 61.9 | 12.2 | +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For MobileNetV3, by default input preprocessing is included as a part of the +#' model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, MobileNetV3 models expect their +#' inputs to be float tensors of pixels with values in the `[0-255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled MobileNetV3 models expect their inputs to be float +#' tensors of pixels with values in the `[-1, 1]` range. +#' +#' # Call Arguments +#' - `inputs`: A floating point `numpy.array` or backend-native tensor, +#' 4D with 3 color channels, with values in the range `[0, 255]` +#' if `include_preprocessing` is `TRUE` and in the range `[-1, 1]` +#' otherwise. +#' +#' @returns +#' A model instance. +#' +#' @param input_shape +#' Optional shape tuple, to be specified if you would +#' like to use a model with an input image resolution that is not +#' `(224, 224, 3)`. +#' It should have exactly 3 inputs channels. +#' You can also omit this option if you would like +#' to infer input_shape from an input_tensor. +#' If you choose to include both input_tensor and input_shape then +#' input_shape will be used if they match, if the shapes +#' do not match then we will throw an error. +#' E.g. `(160, 160, 3)` would be one valid value. +#' +#' @param alpha +#' controls the width of the network. This is known as the +#' depth multiplier in the MobileNetV3 paper, but the name is kept for +#' consistency with MobileNetV1 in Keras. +#' - If `alpha < 1.0`, proportionally decreases the number +#' of filters in each layer. +#' - If `alpha > 1.0`, proportionally increases the number +#' of filters in each layer. +#' - If `alpha == 1`, default number of filters from the paper +#' are used at each layer. +#' +#' @param minimalistic +#' In addition to large and small models this module also +#' contains so-called minimalistic models, these models have the same +#' per-layer dimensions characteristic as MobilenetV3 however, they don't +#' utilize any of the advanced blocks (squeeze-and-excite units, +#' hard-swish, and 5x5 convolutions). +#' While these models are less efficient on CPU, they +#' are much more performant on GPU/DSP. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' String, one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' +#' @param input_tensor +#' Optional Keras tensor (i.e. output of +#' `layers.Input()`) +#' to use as image input for the model. +#' +#' @param pooling +#' String, optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model +#' will be the 4D tensor output of the +#' last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a +#' 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Integer, optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param dropout_rate +#' fraction of the input units to drop on the last layer. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing +#' layer (`Rescaling`) at the bottom of the network. Defaults to `TRUE`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.MobileNetV3Large +application_mobilenet_v3_large <- +function (input_shape = NULL, alpha = 1, minimalistic = FALSE, + include_top = TRUE, weights = "imagenet", input_tensor = NULL, + classes = 1000L, pooling = NULL, dropout_rate = 0.2, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$MobileNetV3Large, args) + set_preprocessing_attributes(model, keras$applications$mobilenet_v3) +} + + +#' Instantiates the MobileNetV3Small architecture. +#' +#' @description +#' +#' # Reference +#' - [Searching for MobileNetV3]( +#' https://arxiv.org/pdf/1905.02244.pdf) (ICCV 2019) +#' +#' The following table describes the performance of MobileNets v3: +#' ------------------------------------------------------------------------ +#' MACs stands for Multiply Adds #' #' |Classification Checkpoint|MACs(M)|Parameters(M)|Top1 Accuracy|Pixel1 CPU(ms)| #' |---|---|---|---|---| @@ -608,359 +2805,983 @@ mobilenet_v2_load_model_hdf5 <- function(filepath) { #' #' For image classification use cases, see #' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For MobileNetV3, by default input preprocessing is included as a part of the +#' model (as a `Rescaling` layer), and thus +#' [`application_preprocess_inputs()`] is actually a +#' pass-through function. In this use case, MobileNetV3 models expect their +#' inputs to be float tensors of pixels with values in the `[0-255]` range. +#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` +#' layer) can be disabled by setting `include_preprocessing` argument to `FALSE`. +#' With preprocessing disabled MobileNetV3 models expect their inputs to be float +#' tensors of pixels with values in the `[-1, 1]` range. +#' +#' # Call Arguments +#' - `inputs`: A floating point `numpy.array` or backend-native tensor, +#' 4D with 3 color channels, with values in the range `[0, 255]` +#' if `include_preprocessing` is `TRUE` and in the range `[-1, 1]` +#' otherwise. +#' +#' @returns +#' A model instance. +#' +#' @param input_shape +#' Optional shape tuple, to be specified if you would +#' like to use a model with an input image resolution that is not +#' `(224, 224, 3)`. +#' It should have exactly 3 inputs channels. +#' You can also omit this option if you would like +#' to infer input_shape from an input_tensor. +#' If you choose to include both input_tensor and input_shape then +#' input_shape will be used if they match, if the shapes +#' do not match then we will throw an error. +#' E.g. `(160, 160, 3)` would be one valid value. +#' +#' @param alpha +#' controls the width of the network. This is known as the +#' depth multiplier in the MobileNetV3 paper, but the name is kept for +#' consistency with MobileNetV1 in Keras. +#' - If `alpha < 1.0`, proportionally decreases the number +#' of filters in each layer. +#' - If `alpha > 1.0`, proportionally increases the number +#' of filters in each layer. +#' - If `alpha == 1`, default number of filters from the paper +#' are used at each layer. +#' +#' @param minimalistic +#' In addition to large and small models this module also +#' contains so-called minimalistic models, these models have the same +#' per-layer dimensions characteristic as MobilenetV3 however, they don't +#' utilize any of the advanced blocks (squeeze-and-excite units, +#' hard-swish, and 5x5 convolutions). +#' While these models are less efficient on CPU, they +#' are much more performant on GPU/DSP. +#' +#' @param include_top +#' Boolean, whether to include the fully-connected +#' layer at the top of the network. Defaults to `TRUE`. +#' +#' @param weights +#' String, one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' +#' @param input_tensor +#' Optional Keras tensor (i.e. output of +#' `layers.Input()`) +#' to use as image input for the model. +#' +#' @param pooling +#' String, optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model +#' will be the 4D tensor output of the +#' last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a +#' 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Integer, optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param dropout_rate +#' fraction of the input units to drop on the last layer. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to use +#' on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @param include_preprocessing +#' Boolean, whether to include the preprocessing +#' layer (`Rescaling`) at the bottom of the network. Defaults to `TRUE`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.MobileNetV3Small +application_mobilenet_v3_small <- +function (input_shape = NULL, alpha = 1, minimalistic = FALSE, + include_top = TRUE, weights = "imagenet", input_tensor = NULL, + classes = 1000L, pooling = NULL, dropout_rate = 0.2, classifier_activation = "softmax", + include_preprocessing = TRUE) +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$MobileNetV3Small, args) + set_preprocessing_attributes(model, keras$applications$mobilenet_v3) +} + + +#' Instantiates a NASNet model in ImageNet mode. +#' +#' @description +#' +#' # Reference +#' - [Learning Transferable Architectures for Scalable Image Recognition]( +#' https://arxiv.org/abs/1707.07012) (CVPR 2018) +#' +#' Optionally loads weights pre-trained on ImageNet. +#' Note that the data format convention used by the model is +#' the one specified in your Keras config at `~/.keras/keras.json`. +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For NASNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. +#' +#' @returns +#' A Keras model instance. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(331, 331, 3)` for NASNetLarge. +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 32. +#' E.g. `(224, 224, 3)` would be one valid value. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' `NULL` (random initialization) or +#' `imagenet` (ImageNet weights). For loading `imagenet` weights, +#' `input_shape` should be (331, 331, 3) +#' +#' @param input_tensor +#' Optional Keras tensor (i.e. output of +#' `layers.Input()`) +#' to use as image input for the model. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model +#' will be the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a +#' 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` +#' can only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.NASNetLarge +application_nasnetlarge <- +function (input_shape = NULL, include_top = TRUE, weights = "imagenet", + input_tensor = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$NASNetLarge, args) + set_preprocessing_attributes(model, keras$applications$nasnet) +} + + +#' Instantiates a Mobile NASNet model in ImageNet mode. +#' +#' @description +#' +#' # Reference +#' - [Learning Transferable Architectures for Scalable Image Recognition]( +#' https://arxiv.org/abs/1707.07012) (CVPR 2018) +#' +#' Optionally loads weights pre-trained on ImageNet. +#' Note that the data format convention used by the model is +#' the one specified in your Keras config at `~/.keras/keras.json`. +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For NASNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. +#' +#' @returns +#' A Keras model instance. +#' +#' @param input_shape +#' Optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(224, 224, 3)` for NASNetMobile +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 32. +#' E.g. `(224, 224, 3)` would be one valid value. +#' +#' @param include_top +#' Whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' `NULL` (random initialization) or +#' `imagenet` (ImageNet weights). For loading `imagenet` weights, +#' `input_shape` should be (224, 224, 3) +#' +#' @param input_tensor +#' Optional Keras tensor (i.e. output of +#' `layers.Input()`) +#' to use as image input for the model. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model +#' will be the 4D tensor output of the +#' last convolutional layer. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional layer, and thus +#' the output of the model will be a +#' 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' Optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` can +#' only be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.NASNetMobile +application_nasnetmobile <- +function (input_shape = NULL, include_top = TRUE, weights = "imagenet", + input_tensor = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer)) + model <- do.call(keras$applications$NASNetMobile, args) + set_preprocessing_attributes(model, keras$applications$nasnet) +} + + +#' Instantiates the ResNet101 architecture. +#' +#' @description +#' +#' # Reference +#' - [Deep Residual Learning for Image Recognition]( +#' https://arxiv.org/abs/1512.03385) (CVPR 2015) +#' +#' For image classification use cases, see [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ResNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. [`application_preprocess_inputs()`] will convert +#' the input images from RGB to BGR, then will zero-center each color channel with +#' respect to the ImageNet dataset, without scaling. +#' +#' @returns +#' A Model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), or the path to the weights +#' file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). It should have exactly 3 +#' inputs channels, and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` means that the output of the model will be the 4D tensor +#' output of the last convolutional block. +#' - `avg` means that global average pooling will be applied to the output +#' of the last convolutional block, and thus the output of the +#' model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images into, only to be +#' specified if `include_top` is `TRUE`, and if no `weights` argument is +#' specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.ResNet101 +application_resnet101 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ResNet101, args) + set_preprocessing_attributes(model, keras$applications$resnet) +} + + +#' Instantiates the ResNet152 architecture. +#' +#' @description +#' +#' # Reference +#' - [Deep Residual Learning for Image Recognition]( +#' https://arxiv.org/abs/1512.03385) (CVPR 2015) +#' +#' For image classification use cases, see [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ResNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. [`application_preprocess_inputs()`] will convert +#' the input images from RGB to BGR, then will zero-center each color channel with +#' respect to the ImageNet dataset, without scaling. +#' +#' @returns +#' A Model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), or the path to the weights +#' file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). It should have exactly 3 +#' inputs channels, and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` means that the output of the model will be the 4D tensor +#' output of the last convolutional block. +#' - `avg` means that global average pooling will be applied to the output +#' of the last convolutional block, and thus the output of the +#' model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images into, only to be +#' specified if `include_top` is `TRUE`, and if no `weights` argument is +#' specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.ResNet152 +application_resnet152 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ResNet152, args) + set_preprocessing_attributes(model, keras$applications$resnet) +} + + +#' Instantiates the ResNet50 architecture. +#' +#' @description +#' +#' # Reference +#' - [Deep Residual Learning for Image Recognition]( +#' https://arxiv.org/abs/1512.03385) (CVPR 2015) +#' +#' For image classification use cases, see [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ResNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. [`application_preprocess_inputs()`] will convert +#' the input images from RGB to BGR, then will zero-center each color channel with +#' respect to the ImageNet dataset, without scaling. +#' +#' @returns +#' A Model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), or the path to the weights +#' file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). It should have exactly 3 +#' inputs channels, and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` means that the output of the model will be the 4D tensor +#' output of the last convolutional block. +#' - `avg` means that global average pooling will be applied to the output +#' of the last convolutional block, and thus the output of the +#' model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images into, only to be +#' specified if `include_top` is `TRUE`, and if no `weights` argument is +#' specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.ResNet50 +application_resnet50 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ResNet50, args) + set_preprocessing_attributes(model, keras$applications$resnet) +} + + +#' Instantiates the ResNet101V2 architecture. +#' +#' @description +#' +#' # Reference +#' - [Identity Mappings in Deep Residual Networks]( +#' https://arxiv.org/abs/1603.05027) (CVPR 2016) +#' +#' For image classification use cases, see [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ResNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. [`application_preprocess_inputs()`] will +#' scale input pixels between -1 and 1. +#' +#' @returns +#' A Model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), or the path to the weights +#' file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). It should have exactly 3 +#' inputs channels, and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` means that the output of the model will be the 4D tensor +#' output of the last convolutional block. +#' - `avg` means that global average pooling will be applied to the output +#' of the last convolutional block, and thus the output of the +#' model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images into, only to be +#' specified if `include_top` is `TRUE`, and if no `weights` argument is +#' specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.ResNet101V2 +application_resnet101_v2 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ResNet101V2, args) + set_preprocessing_attributes(model, keras$applications$resnet_v2) +} + + +#' Instantiates the ResNet152V2 architecture. +#' +#' @description +#' +#' # Reference +#' - [Identity Mappings in Deep Residual Networks]( +#' https://arxiv.org/abs/1603.05027) (CVPR 2016) +#' +#' For image classification use cases, see [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ResNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. [`application_preprocess_inputs()`] will +#' scale input pixels between `-1` and `1`. +#' +#' @returns +#' A Model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), or the path to the weights +#' file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). It should have exactly 3 +#' inputs channels, and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` means that the output of the model will be the 4D tensor +#' output of the last convolutional block. +#' - `avg` means that global average pooling will be applied to the output +#' of the last convolutional block, and thus the output of the +#' model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images into, only to be +#' specified if `include_top` is `TRUE`, and if no `weights` argument is +#' specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.ResNet152V2 +application_resnet152_v2 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ResNet152V2, args) + set_preprocessing_attributes(model, keras$applications$resnet_v2) +} + + +#' Instantiates the ResNet50V2 architecture. +#' +#' @description +#' +#' # Reference +#' - [Identity Mappings in Deep Residual Networks]( +#' https://arxiv.org/abs/1603.05027) (CVPR 2016) +#' +#' For image classification use cases, see [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For ResNet, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. [`application_preprocess_inputs()`] will +#' scale input pixels between `-1` and `1`. +#' +#' @returns +#' A Model instance. +#' +#' @param include_top +#' whether to include the fully-connected +#' layer at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), or the path to the weights +#' file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified if `include_top` +#' is `FALSE` (otherwise the input shape has to be `(224, 224, 3)` +#' (with `"channels_last"` data format) or `(3, 224, 224)` +#' (with `"channels_first"` data format). It should have exactly 3 +#' inputs channels, and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction when `include_top` +#' is `FALSE`. +#' - `NULL` means that the output of the model will be the 4D tensor +#' output of the last convolutional block. +#' - `avg` means that global average pooling will be applied to the output +#' of the last convolutional block, and thus the output of the +#' model will be a 2D tensor. +#' - `max` means that global max pooling will be applied. +#' +#' @param classes +#' optional number of classes to classify images into, only to be +#' specified if `include_top` is `TRUE`, and if no `weights` argument is +#' specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" layer. +#' When loading pretrained weights, `classifier_activation` can only +#' be `NULL` or `"softmax"`. +#' +#' @export +#' @seealso +#' + +# + +#' @tether keras.applications.ResNet50V2 +application_resnet50_v2 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$ResNet50V2, args) + set_preprocessing_attributes(model, keras$applications$resnet_v2) +} + + +#' Instantiates the VGG16 model. +#' +#' @description +#' +#' # Reference +#' - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( +#' https://arxiv.org/abs/1409.1556) (ICLR 2015) +#' +#' For image classification use cases, see +#' [this page for detailed examples]( #' https://keras.io/api/applications/#usage-examples-for-image-classification-models). #' #' For transfer learning use cases, make sure to read the #' [guide to transfer learning & fine-tuning]( #' https://keras.io/guides/transfer_learning/). #' -#' @note -#' Each Keras application typically expects a specific kind of input preprocessing. -#' For ModelNetV3, by default input preprocessing is included as a part of the -#' model (as a `Rescaling` layer), and thus -#' a preprocessing function is not necessary. In this use case, ModelNetV3 models expect their inputs -#' to be float tensors of pixels with values in the `[0-255]` range. -#' At the same time, preprocessing as a part of the model (i.e. `Rescaling` -#' layer) can be disabled by setting `include_preprocessing` argument to FALSE. -#' With preprocessing disabled ModelNetV3 models expect their inputs to be float -#' tensors of pixels with values in the `[-1, 1]` range. -#' -#' @param input_shape Optional shape vector, to be specified if you would -#' like to use a model with an input image resolution that is not -#' `c(224, 224, 3)`. -#' It should have exactly 3 inputs channels `c(224, 224, 3)`. -#' You can also omit this option if you would like -#' to infer input_shape from an input_tensor. -#' If you choose to include both input_tensor and input_shape then -#' input_shape will be used if they match, if the shapes -#' do not match then we will throw an error. -#' E.g. `c(160, 160, 3)` would be one valid value. +#' The default input size for this model is 224x224. #' -#' @param alpha controls the width of the network. This is known as the -#' depth multiplier in the MobileNetV3 paper, but the name is kept for -#' consistency with MobileNetV1 in Keras. -#' - If `alpha` < 1.0, proportionally decreases the number -#' of filters in each layer. -#' - If `alpha` > 1.0, proportionally increases the number -#' of filters in each layer. -#' - If `alpha` = 1, default number of filters from the paper -#' are used at each layer. +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For VGG16, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. +#' [`application_preprocess_inputs()`] will convert the input images from RGB to BGR, +#' then will zero-center each color channel with respect to the ImageNet +#' dataset, without scaling. #' -#' @param minimalistic In addition to large and small models this module also -#' contains so-called minimalistic models, these models have the same -#' per-layer dimensions characteristic as MobilenetV3 however, they don't -#' utilize any of the advanced blocks (squeeze-and-excite units, hard-swish, -#' and 5x5 convolutions). While these models are less efficient on CPU, they -#' are much more performant on GPU/DSP. +#' @returns +#' A model instance. #' -#' @param include_top Boolean, whether to include the fully-connected -#' layer at the top of the network. Defaults to `TRUE`. +#' @param include_top +#' whether to include the 3 fully-connected +#' layers at the top of the network. #' -#' @param weights String, one of `NULL` (random initialization), -#' 'imagenet' (pre-training on ImageNet), +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), #' or the path to the weights file to be loaded. #' -#' @param input_tensor Optional Keras tensor (i.e. output of -#' `layer_input()`) +#' @param input_tensor +#' optional Keras tensor +#' (i.e. output of `layers.Input()`) #' to use as image input for the model. #' -#' @param pooling String, optional pooling mode for feature extraction +#' @param input_shape +#' optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(224, 224, 3)` +#' (with `channels_last` data format) or +#' `(3, 224, 224)` (with `"channels_first"` data format). +#' It should have exactly 3 input channels, +#' and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction #' when `include_top` is `FALSE`. -#' - `NULL` means that the output of the model -#' will be the 4D tensor output of the +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the #' last convolutional block. #' - `avg` means that global average pooling #' will be applied to the output of the #' last convolutional block, and thus -#' the output of the model will be a -#' 2D tensor. +#' the output of the model will be a 2D tensor. #' - `max` means that global max pooling will #' be applied. #' -#' @param classes Integer, optional number of classes to classify images -#' into, only to be specified if `include_top` is TRUE, and +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and #' if no `weights` argument is specified. #' -#' @param dropout_rate fraction of the input units to drop on the last layer. -#' -#' @param classifier_activation A string or callable. The activation function to use -#' on the "top" layer. Ignored unless `include_top = TRUE`. Set -#' `classifier_activation = NULL` to return the logits of the "top" layer. -#' When loading pretrained weights, `classifier_activation` can only -#' be `NULL` or `"softmax"`. -#' -#' @param include_preprocessing Boolean, whether to include the preprocessing -#' layer (`Rescaling`) at the bottom of the network. Defaults to `TRUE`. -#' -#' @returns A keras `Model` instance -#' @name application_mobilenet_v3 -#' @rdname application_mobilenet_v3 +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` +#' can only be `NULL` or `"softmax"`. #' -#' @seealso -#' + -#' + -#' + #' @export -application_mobilenet_v3_large <- -function(input_shape = NULL, - alpha = 1.0, - minimalistic = FALSE, - include_top = TRUE, - weights = "imagenet", - input_tensor = NULL, - classes = 1000L, - pooling = NULL, - dropout_rate = 0.2, - classifier_activation = "softmax", - include_preprocessing = TRUE) +#' @seealso +#' + +# + +#' @tether keras.applications.VGG16 +application_vgg16 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") { - require_tf_version("2.4", "application_mobilenet_v3_large") - args <- capture_args(match.call(), list( - classes = as.integer, - input_shape = normalize_shape)) - do.call(keras$applications$MobileNetV3Large, args) + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$VGG16, args) + set_preprocessing_attributes(model, keras$applications$vgg16) } -#' @export -#' @rdname application_mobilenet_v3 -application_mobilenet_v3_small <- -function(input_shape = NULL, - alpha = 1.0, - minimalistic = FALSE, - include_top = TRUE, - weights = "imagenet", - input_tensor = NULL, - classes = 1000L, - pooling = NULL, - dropout_rate = 0.2, - classifier_activation = "softmax", - include_preprocessing = TRUE) -{ - require_tf_version("2.4", "application_mobilenet_v3_small") - args <- capture_args(match.call(), list( - classes = as.integer, - input_shape = normalize_shape)) - do.call(keras$applications$MobileNetV3Small, args) -} -#' Instantiates the DenseNet architecture. -#' -#' @details -#' -#' Optionally loads weights pre-trained -#' on ImageNet. Note that when using TensorFlow, -#' for best performance you should set -#' `image_data_format='channels_last'` in your Keras config -#' at ~/.keras/keras.json. -#' -#' The model and the weights are compatible with -#' TensorFlow, Theano, and CNTK. The data format -#' convention used by the model is the one -#' specified in your Keras config file. -#' -#' @param blocks numbers of building blocks for the four dense layers. -#' @param include_top whether to include the fully-connected layer at the top -#' of the network. -#' @param weights one of `NULL` (random initialization), 'imagenet' -#' (pre-training on ImageNet), or the path to the weights file to be loaded. -#' @param input_tensor optional Keras tensor (i.e. output of `layer_input()`) -#' to use as image input for the model. -#' @param input_shape optional shape list, only to be specified if `include_top` -#' is FALSE (otherwise the input shape has to be `(224, 224, 3)` -#' (with `channels_last` data format) or `(3, 224, 224)` (with -#' `channels_first` data format). It should have exactly 3 inputs channels. -#' @param pooling optional pooling mode for feature extraction when -#' `include_top` is `FALSE`. -#' - `NULL` means that the output of the model will be the 4D tensor output -#' of the last convolutional layer. -#' - `avg` means that global average pooling will be applied to the output -#' of the last convolutional layer, and thus the output of the model -#' will be a 2D tensor. -#' - `max` means that global max pooling will be applied. -#' @param classes optional number of classes to classify images into, only to be -#' specified if `include_top` is TRUE, and if no `weights` argument is -#' specified. -#' @param data_format data format of the image tensor. -#' @param x a 3D or 4D array consists of RGB values within `[0, 255]`. +#' Instantiates the VGG19 model. #' -#' @export -application_densenet <- function(blocks, include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000) { - - keras$applications$densenet$DenseNet( - blocks = as.integer(blocks), - include_top = include_top, - weights = weights, - input_tensor = input_tensor, - input_shape = normalize_shape(input_shape), - pooling = pooling, - classes = as.integer(classes) - ) - -} - -#' @rdname application_densenet -#' @export -application_densenet121 <- function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, - input_shape = NULL, pooling = NULL, classes = 1000) { - keras$applications$DenseNet121( - include_top = include_top, - weights = weights, - input_tensor = input_tensor, - input_shape = normalize_shape(input_shape), - pooling = pooling, - classes = as.integer(classes) - ) -} - -#' @rdname application_densenet -#' @export -application_densenet169 <- function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, - input_shape = NULL, pooling = NULL, classes = 1000) { - keras$applications$DenseNet169( - include_top = include_top, - weights = weights, - input_tensor = input_tensor, - input_shape = normalize_shape(input_shape), - pooling = pooling, - classes = as.integer(classes) - ) -} - -#' @rdname application_densenet -#' @export -application_densenet201 <- function(include_top = TRUE, weights = "imagenet", input_tensor = NULL, - input_shape = NULL, pooling = NULL, classes = 1000) { - keras$applications$DenseNet201( - include_top = include_top, - weights = weights, - input_tensor = input_tensor, - input_shape = normalize_shape(input_shape), - pooling = pooling, - classes = as.integer(classes) - ) -} - -#' @rdname application_densenet -#' @export -densenet_preprocess_input <- function(x, data_format = NULL) { - preprocess_input(x, keras$applications$densenet$preprocess_input) -} - -#' Instantiates a NASNet model. -#' -#' Note that only TensorFlow is supported for now, -#' therefore it only works with the data format -#' `image_data_format='channels_last'` in your Keras config -#' at `~/.keras/keras.json`. -#' -#' @param input_shape Optional shape list, the input shape is by default `(331, 331, 3)` -#' for NASNetLarge and `(224, 224, 3)` for NASNetMobile It should have exactly 3 -#' inputs channels, and width and height should be no smaller than 32. E.g. -#' `(224, 224, 3)` would be one valid value. -#' @param penultimate_filters Number of filters in the penultimate layer. -#' NASNet models use the notation `NASNet (N @ P)`, where: -#' - N is the number of blocks -#' - P is the number of penultimate filters -#' @param num_blocks Number of repeated blocks of the NASNet model. NASNet -#' models use the notation `NASNet (N @ P)`, where: -#' - N is the number of blocks -#' - P is the number of penultimate filters -#' @param stem_block_filters Number of filters in the initial stem block -#' @param skip_reduction Whether to skip the reduction step at the tail end -#' of the network. Set to `FALSE` for CIFAR models. -#' @param filter_multiplier Controls the width of the network. -#' - If `filter_multiplier` < 1.0, proportionally decreases the number of -#' filters in each layer. -#' - If `filter_multiplier` > 1.0, proportionally increases the number of -#' filters in each layer. - If `filter_multiplier` = 1, default number of -#' filters from the paper are used at each layer. -#' @param include_top Whether to include the fully-connected layer at the top -#' of the network. -#' @param weights `NULL` (random initialization) or `imagenet` (ImageNet weights) -#' @param input_tensor Optional Keras tensor (i.e. output of `layer_input()`) -#' to use as image input for the model. -#' @param pooling Optional pooling mode for feature extraction when -#' `include_top` is `FALSE`. -#' - `NULL` means that the output of the model will be the 4D tensor output -#' of the last convolutional layer. -#' - `avg` means that global average pooling will be applied to the output -#' of the last convolutional layer, and thus the output of the model will -#' be a 2D tensor. -#' - `max` means that global max pooling will be applied. -#' @param classes Optional number of classes to classify images into, only to be -#' specified if `include_top` is TRUE, and if no `weights` argument is -#' specified. -#' @param default_size Specifies the default image size of the model -#' @param x a 4D array consists of RGB values within `[0, 255]`. +#' @description +#' +#' # Reference +#' - [Very Deep Convolutional Networks for Large-Scale Image Recognition]( +#' https://arxiv.org/abs/1409.1556) (ICLR 2015) +#' +#' For image classification use cases, see +#' [this page for detailed examples]( +#' https://keras.io/api/applications/#usage-examples-for-image-classification-models). +#' +#' For transfer learning use cases, make sure to read the +#' [guide to transfer learning & fine-tuning]( +#' https://keras.io/guides/transfer_learning/). +#' +#' The default input size for this model is 224x224. +#' +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For VGG19, call [`application_preprocess_inputs()`] on your +#' inputs before passing them to the model. +#' [`application_preprocess_inputs()`] will convert the input images from RGB to BGR, +#' then will zero-center each color channel with respect to the ImageNet +#' dataset, without scaling. +#' +#' @returns +#' A model instance. +#' +#' @param include_top +#' whether to include the 3 fully-connected +#' layers at the top of the network. +#' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. +#' +#' @param input_tensor +#' optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(224, 224, 3)` +#' (with `channels_last` data format) or +#' `(3, 224, 224)` (with `"channels_first"` data format). +#' It should have exactly 3 input channels, +#' and width and height should be no smaller than 32. +#' E.g. `(200, 200, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` can +#' only be `NULL` or `"softmax"`. #' #' @export -application_nasnet <- function(input_shape = NULL, penultimate_filters = 4032L, - num_blocks = 6L, stem_block_filters = 96L, - skip_reduction = TRUE, filter_multiplier = 2L, - include_top = TRUE, weights = NULL, - input_tensor = NULL, pooling = NULL, - classes = 1000, default_size = NULL) { - - keras$applications$nasnet$NASNet( - input_shape = normalize_shape(input_shape), - penultimate_filters = as.integer(penultimate_filters), - num_blocks = as.integer(num_blocks), - stem_block_filters = as.integer(stem_block_filters), - skip_reduction = skip_reduction, - filter_multiplier = filter_multiplier, - include_top = include_top, - weights = weights, - input_tensor = input_tensor, - pooling = pooling, - classes = as.integer(classes), - default_size = default_size - ) - -} - -#' @rdname application_nasnet -#' @export -application_nasnetlarge <- function(input_shape = NULL, include_top = TRUE, weights = NULL, - input_tensor = NULL, pooling = NULL, classes = 1000) { - - keras$applications$NASNetLarge( - input_shape = normalize_shape(input_shape), - include_top = include_top, - weights = weights, - input_tensor = input_tensor, - pooling = pooling, - classes = as.integer(classes) - ) - +#' @seealso +#' + +# + +#' @tether keras.applications.VGG19 +application_vgg19 <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") +{ + args <- capture_args(list(classes = as_integer)) + model <- do.call(keras$applications$VGG19, args) + set_preprocessing_attributes(model, keras$applications$vgg19) } -#' @rdname application_nasnet -#' @export -application_nasnetmobile <- function(input_shape = NULL, include_top = TRUE, weights = NULL, - input_tensor = NULL, pooling = NULL, classes = 1000) { - - keras$applications$NASNetMobile( - input_shape = normalize_shape(input_shape), - include_top = include_top, - weights = weights, - input_tensor = input_tensor, - pooling = pooling, - classes = as.integer(classes) - ) - -} -#' Instantiates the EfficientNetB0 architecture +#' Instantiates the Xception architecture. #' -#' @details -#' Reference: -#' - [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks]( -#' https://arxiv.org/abs/1905.11946) (ICML 2019) +#' @description #' -#' This function returns a Keras image classification model, -#' optionally loaded with weights pre-trained on ImageNet. +#' # Reference +#' - [Xception: Deep Learning with Depthwise Separable Convolutions]( +#' https://arxiv.org/abs/1610.02357) (CVPR 2017) #' #' For image classification use cases, see #' [this page for detailed examples]( @@ -970,151 +3791,250 @@ application_nasnetmobile <- function(input_shape = NULL, include_top = TRUE, wei #' [guide to transfer learning & fine-tuning]( #' https://keras.io/guides/transfer_learning/). #' -#' EfficientNet models expect their inputs to be float tensors of pixels with values in the `[0-255]` range. +#' The default input image size for this model is 299x299. #' -#' @note -#' Each Keras Application typically expects a specific kind of input preprocessing. -#' For EfficientNet, input preprocessing is included as part of the model -#' (as a `Rescaling` layer), and thus a calling a preprocessing function is not necessary. +#' # Note +#' Each Keras Application expects a specific kind of input preprocessing. +#' For Xception, call [`application_preprocess_inputs()`] +#' on your inputs before passing them to the model. +#' [`application_preprocess_inputs()`] will scale input pixels between `-1` and `1`. #' -#' @inheritParams application_xception +#' @returns +#' A model instance. #' -#' @param input_shape Optional shape list, only to be specified -#' if `include_top` is FALSE. -#' It should have exactly 3 inputs channels. +#' @param include_top +#' whether to include the 3 fully-connected +#' layers at the top of the network. #' +#' @param weights +#' one of `NULL` (random initialization), +#' `"imagenet"` (pre-training on ImageNet), +#' or the path to the weights file to be loaded. #' -#' @name application_efficientnet -#' @rdname application_efficientnet +#' @param input_tensor +#' optional Keras tensor +#' (i.e. output of `layers.Input()`) +#' to use as image input for the model. +#' +#' @param input_shape +#' optional shape tuple, only to be specified +#' if `include_top` is `FALSE` (otherwise the input shape +#' has to be `(299, 299, 3)`. +#' It should have exactly 3 inputs channels, +#' and width and height should be no smaller than 71. +#' E.g. `(150, 150, 3)` would be one valid value. +#' +#' @param pooling +#' Optional pooling mode for feature extraction +#' when `include_top` is `FALSE`. +#' - `NULL` means that the output of the model will be +#' the 4D tensor output of the +#' last convolutional block. +#' - `avg` means that global average pooling +#' will be applied to the output of the +#' last convolutional block, and thus +#' the output of the model will be a 2D tensor. +#' - `max` means that global max pooling will +#' be applied. +#' +#' @param classes +#' optional number of classes to classify images +#' into, only to be specified if `include_top` is `TRUE`, and +#' if no `weights` argument is specified. +#' +#' @param classifier_activation +#' A `str` or callable. The activation function to +#' use on the "top" layer. Ignored unless `include_top=TRUE`. Set +#' `classifier_activation=NULL` to return the logits of the "top" +#' layer. When loading pretrained weights, `classifier_activation` can +#' only be `NULL` or `"softmax"`. #' -#' @seealso -#' + -#' + #' @export -application_efficientnet_b0 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) +#' @seealso +#' + +# + +#' @tether keras.applications.Xception +application_xception <- +function (include_top = TRUE, weights = "imagenet", input_tensor = NULL, + input_shape = NULL, pooling = NULL, classes = 1000L, classifier_activation = "softmax") { - require_tf_version("2.3", "application_efficientnet_b0") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB0, args) + args <- capture_args(list(classes = as_integer, input_shape = normalize_shape)) + model <- do.call(keras$applications$Xception, args) + set_preprocessing_attributes(model, keras$applications$xception) } -#' @export -#' @rdname application_efficientnet -application_efficientnet_b1 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) -{ - require_tf_version("2.3", "application_efficientnet_b1") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB1, args) -} +#' Preprocessing and postprocessing utilities +#' +#' @description +#' These functions are used to preprocess and postprocess +#' inputs and outputs of Keras applications. +#' +#' @param model A Keras model initialized using any `application_` function. +#' @param x A batch of inputs to the model. +#' @param preds A batch of outputs from the model. +#' @param ... Additional arguments passed to the preprocessing or decoding function. +#' @param top The number of top predictions to return. +#' @param data_format +#' Optional data format of the image tensor/array. +#' `NULL` means the global setting +#' `config_image_data_format()` is used +#' (unless you changed it, it uses `"channels_last"`). +#' Defaults to `NULL`. +#' +#' @returns +#' - A list of decoded predictions in case of `application_decode_predictions()`. +#' - A batch of preprocessed inputs in case of `application_preprocess_inputs()`. +#' +#' @examples \dontrun{ +#' model <- application_convnext_tiny() +#' +#' inputs <- random_normal(c(32, 224, 224, 3)) +#' processed_inputs <- application_preprocess_inputs(model, inputs) +#' +#' preds <- random_normal(c(32, 1000)) +#' decoded_preds <- application_decode_predictions(model, preds) +#' +#' } +#' @name process_utils +NULL +#' @describeIn process_utils Pre-process inputs to be used in the model #' @export -#' @rdname application_efficientnet -application_efficientnet_b2 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) -{ - require_tf_version("2.3", "application_efficientnet_b2") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB2, args) +application_preprocess_inputs <- function(model, x, ..., data_format = NULL) { + preprocess_input <- attr(model, "preprocess_input") + if (is.null(preprocess_input)) not_found_errors() + preprocess_input(x, data_format = data_format, ...) } +#' @describeIn process_utils Decode predictions from the model #' @export -#' @rdname application_efficientnet -application_efficientnet_b3 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) -{ - require_tf_version("2.3", "application_efficientnet_b3") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB3, args) +application_decode_predictions <- function(model, preds, top = 5L, ...) { + decode_predictions <- attr(model, "decode_predictions") + if (is.null(decode_predictions)) not_found_errors() + decode_predictions(preds, top = as_integer(top), ...) } -#' @export -#' @rdname application_efficientnet -application_efficientnet_b4 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) -{ - require_tf_version("2.3", "application_efficientnet_b4") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB4, args) -} +not_found_errors <- function(model) { + if (!inherits(model, "keras.src.models.model.Model")) { + cli::cli_abort(c( + x = "The {.arg model} argument must be a Keras model, got {.cls {head(class(model))}}" + )) + } -#' @export -#' @rdname application_efficientnet -application_efficientnet_b5 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) -{ - require_tf_version("2.3", "application_efficientnet_b5") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB5, args) + if (model$name %in% list_model_names()) { + cli::cli_abort(c( + x = "The {.arg model} argument must be created using the `application_` functions.", + i = "It looks like it was returned by a different type of call." + )) + } + + rlang::abort(c(x = "No preprocessing/decoding utilities found for this model.")) } -#' @export -#' @rdname application_efficientnet -application_efficientnet_b6 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) -{ - require_tf_version("2.3", "application_efficientnet_b6") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB6, args) +list_model_names <- function() { + # this list is used to produce a nicer error message when a user initialized + # the model using the raw interface instead of using the `application_` functions + # it can be updated with something like: + # model_names <- ls(envir = asNamespace("keras")) %>% + # purrr::keep(\(name) stringr::str_detect(name, "^application_")) %>% + # purrr::map_chr(\(name) do.call(name, list(weights = NULL))$name) + # dput(model_names) + c("convnext_base", "convnext_large", "convnext_small", "convnext_tiny", + "convnext_xlarge", "densenet121", "densenet169", "densenet201", + "efficientnetb0", "efficientnetb1", "efficientnetb2", "efficientnetb3", + "efficientnetb4", "efficientnetb5", "efficientnetb6", "efficientnetb7", + "efficientnetv2-b0", "efficientnetv2-b1", "efficientnetv2-b2", + "efficientnetv2-b3", "efficientnetv2-l", "efficientnetv2-m", + "efficientnetv2-s", "inception_resnet_v2", "inception_v3", "mobilenet_1.00_224", + "mobilenetv2_1.00_224", "MobilenetV3large", "MobilenetV3small", + "NASNet", "NASNet", "resnet101", "resnet101v2", "resnet152", + "resnet152v2", "resnet50", "resnet50v2", "vgg16", "vgg19", "xception" + ) } -#' @export -#' @rdname application_efficientnet -application_efficientnet_b7 <- -function(include_top = TRUE, weights = "imagenet", - input_tensor = NULL, input_shape = NULL, - pooling = NULL, classes = 1000L, - classifier_activation = "softmax", - ...) -{ - require_tf_version("2.3", "application_efficientnet_b7") - args <- capture_args(match.call(), list(classes = as.integer, input_shape = normalize_shape)) - do.call(keras$applications$EfficientNetB7, args) +set_preprocessing_attributes <- function(object, module) { + .preprocess_input <- r_to_py(module)$preprocess_input + + attr(object, "preprocess_input") <- + as.function.default(c(formals(.preprocess_input), bquote({ + args <- capture_args(list( + x = function(x) { + if (!is_py_object(x)) + x <- np_array(x) + if (inherits(x, "numpy.ndarray") && + !py_bool(x$flags$writeable)) + x <- x$copy() + x + } + )) + do.call(.(.preprocess_input), args) + })), envir = parent.env(environment())) + + attr(object, "decode_predictions") <- module$decode_predictions + object } -#' @rdname application_nasnet +#' Decodes the prediction of an ImageNet model. +#' +#' @param preds Tensor encoding a batch of predictions. +#' @param top integer, how many top-guesses to return. +#' +#' @return List of data frames with variables `class_name`, `class_description`, +#' and `score` (one data frame per sample in batch input). +#' #' @export -nasnet_preprocess_input <- function(x) { - preprocess_input(x, keras$applications$nasnet$preprocess_input) -} +#' @keywords internal +imagenet_decode_predictions <- function(preds, top = 5) { -preprocess_input <- function(x, preprocessor, ...) { - preprocessor(keras_array(x), ...) -} + # decode predictions + decoded <- keras$applications$imagenet_utils$decode_predictions( + preds = preds, + top = as.integer(top) + ) -verify_application_prerequistes <- function() { + # convert to a list of data frames + lapply(decoded, function(x) { + m <- t(sapply(1:length(x), function(n) x[[n]])) + data.frame(class_name = as.character(m[,1]), + class_description = as.character(m[,2]), + score = as.numeric(m[,3]), + stringsAsFactors = FALSE) + }) +} - if (!have_h5py()) - stop("The h5py Python package is required to use pre-built Keras models", call. = FALSE) +#' Preprocesses a tensor or array encoding a batch of images. +#' +#' @param x Input Numpy or symbolic tensor, 3D or 4D. +#' @param data_format Data format of the image tensor/array. +#' @param mode One of "caffe", "tf", or "torch" +#' - caffe: will convert the images from RGB to BGR, +#' then will zero-center each color channel with +#' respect to the ImageNet dataset, +#' without scaling. +#' - tf: will scale pixels between -1 and 1, sample-wise. +#' - torch: will scale pixels between 0 and 1 and then +#' will normalize each channel with respect to the +#' ImageNet dataset. +#' +#' @return Preprocessed tensor or array. +#' +#' @export +#' @keywords internal +imagenet_preprocess_input <- function(x, data_format = NULL, mode = "caffe") { + args <- capture_args(list( + x = function(x) { + if (!is_py_object(x)) + x <- np_array(x) + if (inherits(x, "numpy.ndarray") && + !py_bool(x$flags$writeable)) + x <- x$copy() + x + } + )) + + preprocess_input <- r_to_py(keras$applications$imagenet_utils)$preprocess_input + do.call(preprocess_input, args) } + diff --git a/R/backend.R b/R/backend.R deleted file mode 100644 index 38c6eefa3e..0000000000 --- a/R/backend.R +++ /dev/null @@ -1,3171 +0,0 @@ - - -#' Keras backend tensor engine -#' -#' Obtain a reference to the `keras.backend` Python module used to implement -#' tensor operations. -#' -#' @inheritParams reticulate::import -#' -#' @note See the documentation here for -#' additional details on the available functions. -#' -#' @return Reference to Keras backend python module. -#' -#' @export -backend <- function(convert = TRUE) { - if (convert) - keras$backend - else - r_to_py(keras$backend) -} - - -#' Element-wise absolute value. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_abs <- function(x) { - keras$backend$abs( - x = x - ) -} - - -#' Bitwise reduction (logical AND). -#' -#' @param x Tensor or variable. -#' @param axis Axis along which to perform the reduction (axis indexes are -#' 1-based). -#' @param keepdims whether the drop or broadcast the reduction axes. -#' -#' @return A uint8 tensor (0s and 1s). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_all <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$all( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Bitwise reduction (logical OR). -#' -#' @param x Tensor or variable. -#' @param axis Axis along which to perform the reduction (axis indexes -#' are 1-based). -#' @param keepdims whether the drop or broadcast the reduction axes. -#' -#' @return A uint8 tensor (0s and 1s). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_any <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$any( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Creates a 1D tensor containing a sequence of integers. -#' -#' The function arguments use the same convention as Theano's arange: if only -#' one argument is provided, it is in fact the "stop" argument. The default -#' type of the returned tensor is `'int32'` to match TensorFlow's default. -#' -#' @param start Start value. -#' @param stop Stop value. -#' @param step Difference between two successive values. -#' @param dtype Integer dtype to use. -#' -#' @return An integer tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_arange <- function(start, stop = NULL, step = 1, dtype = "int32") { - keras$backend$arange( - start = as.integer(start), - stop = as_nullable_integer(stop), - step = as.integer(step), - dtype = dtype - ) -} - - -#' Returns the index of the maximum value along an axis. -#' -#' @param x Tensor or variable. -#' @param axis Axis along which to perform the reduction (axis indexes are -#' 1-based). Pass -1 (the default) to select the last axis. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_argmax <- function(x, axis = -1) { - keras$backend$argmax( - x = x, - axis = as_axis(axis) - ) -} - - -#' Returns the index of the minimum value along an axis. -#' -#' @param x Tensor or variable. -#' @param axis Axis along which to perform the reduction (axis indexes are -#' 1-based). Pass -1 (the default) to select the last axis. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_argmin <- function(x, axis = -1) { - keras$backend$argmin( - x = x, - axis = as_axis(axis) - ) -} - - -#' Active Keras backend -#' -#' @return The name of the backend Keras is currently using. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_backend <- function() { - keras$backend$backend( - ) -} - - -#' Batchwise dot product. -#' -#' `batch_dot` is used to compute dot product of `x` and `y` when `x` and `y` -#' are data in batch, i.e. in a shape of `(batch_size)`. `batch_dot` results in -#' a tensor or variable with less dimensions than the input. If the number of -#' dimensions is reduced to 1, we use `expand_dims` to make sure that ndim is -#' at least 2. -#' -#' @param x Keras tensor or variable with 2 more more axes. -#' @param y Keras tensor or variable with 2 or more axes -#' @param axes List of (or single) integer with target dimensions (axis indexes -#' are 1-based). The lengths of `axes[[1]]` and `axes[[2]]` should be the -#' same. -#' -#' @return A tensor with shape equal to the concatenation of `x`'s shape (less -#' the dimension that was summed over) and `y`'s shape (less the batch -#' dimension and the dimension that was summed over). If the final rank is 1, -#' we reshape it to `(batch_size, 1)`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_batch_dot <- function(x, y, axes) { - keras$backend$batch_dot( - x = x, - y = y, - axes = as_axis(axes) - ) -} - - -#' Turn a nD tensor into a 2D tensor with same 1st dimension. -#' -#' In other words, it flattens each data samples of a batch. -#' -#' @param x A tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_batch_flatten <- function(x) { - keras$backend$batch_flatten( - x = x - ) -} - - -#' Returns the value of more than one tensor variable. -#' -#' @param ops List of ops to evaluate. -#' -#' @return A list of arrays. -#' -#' @seealso [k_batch_set_value()] -#' -#' @template roxlate-keras-backend -#' -#' @export -k_batch_get_value <- function(ops) { - keras$backend$batch_get_value(ops) -} - - -#' Applies batch normalization on x given mean, var, beta and gamma. -#' -#' i.e. returns -#' `output <- (x - mean) / (sqrt(var) + epsilon) * gamma + beta` -#' -#' @param x Input tensor or variable. -#' @param mean Mean of batch. -#' @param var Variance of batch. -#' @param beta Tensor with which to center the input. -#' @param gamma Tensor by which to scale the input. -#' @param axis Axis (axis indexes are 1-based). Pass -1 (the -#' default) to select the last axis. -#' @param epsilon Fuzz factor. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_batch_normalization <- function(x, mean, var, beta, gamma, axis = -1, epsilon = 0.001) { - - args <- list( - x = x, - mean = mean, - var = var, - beta = beta, - gamma = gamma, - epsilon = epsilon - ) - - if (keras_version() >= "2.2.0") - args$axis <- as_axis(axis) - - do.call(keras$backend$batch_normalization, args) -} - - -#' Sets the values of many tensor variables at once. -#' -#' @param lists a list of lists `(tensor, value)`. `value` should be an R array. -#' -#' @seealso [k_batch_get_value()] -#' -#' @template roxlate-keras-backend -#' -#' @export -k_batch_set_value <- function(lists) { - keras$backend$batch_set_value( - tuples = lists - ) -} - - -#' Adds a bias vector to a tensor. -#' -#' @param x Tensor or variable. -#' @param bias Bias tensor to add. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' -#' @return Output tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_bias_add <- function(x, bias, data_format = NULL) { - keras$backend$bias_add( - x = x, - bias = bias, - data_format = data_format - ) -} - - -#' Binary crossentropy between an output tensor and a target tensor. -#' -#' @param target A tensor with the same shape as `output`. -#' @param output A tensor. -#' @param from_logits Whether `output` is expected to be a logits tensor. By -#' default, we consider that `output` encodes a probability distribution. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_binary_crossentropy <- function(target, output, from_logits = FALSE) { - keras$backend$binary_crossentropy( - target = target, - output = output, - from_logits = from_logits - ) -} - - -#' Casts a tensor to a different dtype and returns it. -#' -#' You can cast a Keras variable but it still returns a Keras tensor. -#' -#' @param x Keras tensor (or variable). -#' @param dtype String, either (`'float16'`, `'float32'`, or `'float64'`). -#' -#' @return Keras tensor with dtype `dtype`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_cast <- function(x, dtype) { - keras$backend$cast( - x = x, - dtype = dtype - ) -} - - -#' Cast an array to the default Keras float type. -#' -#' @param x Array. -#' -#' @return The same array, cast to its new type. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_cast_to_floatx <- function(x) { - r_to_py(keras$backend)$cast_to_floatx( - x = x - ) -} - - -#' Categorical crossentropy between an output tensor and a target tensor. -#' -#' @inheritParams k_batch_normalization -#' -#' @param target A tensor of the same shape as `output`. -#' @param output A tensor resulting from a softmax (unless `from_logits` is -#' TRUE, in which case `output` is expected to be the logits). -#' @param from_logits Logical, whether `output` is the result of a softmax, or -#' is a tensor of logits. -#' -#' @return Output tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_categorical_crossentropy <- function(target, output, from_logits = FALSE, axis = -1) { - - args <- list( - target = target, - output = output, - from_logits = from_logits - ) - - if (keras_version() >= "2.2.0") - args$axis <- as_axis(axis) - - do.call(keras$backend$categorical_crossentropy, args) -} - - -#' Destroys the current TF graph and creates a new one. -#' -#' Useful to avoid clutter from old models / layers. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_clear_session <- function() { - keras$backend$clear_session() -} - - -#' Element-wise value clipping. -#' -#' @param x Tensor or variable. -#' @param min_value Float or integer. -#' @param max_value Float or integer. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_clip <- function(x, min_value = NULL, max_value = NULL) { - keras$backend$clip( - x = x, - min_value = min_value, - max_value = max_value - ) -} - - -#' Concatenates a list of tensors alongside the specified axis. -#' -#' @param tensors list of tensors to concatenate. -#' @param axis concatenation axis (axis indexes are 1-based). Pass -1 (the -#' default) to select the last axis. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_concatenate <- function(tensors, axis = -1) { - keras$backend$concatenate( - tensors = tensors, - axis = as_axis(axis) - ) -} - - -#' Creates a constant tensor. -#' -#' @param value A constant value -#' @param dtype The type of the elements of the resulting tensor. -#' @param shape Optional dimensions of resulting tensor. -#' @param name Optional name for the tensor. -#' -#' @return A Constant Tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_constant <- function(value, dtype = NULL, shape = NULL, name = NULL) { - keras$backend$constant( - value = value, - dtype = dtype, - shape = backend_normalize_shape(shape), - name = name - ) -} - - - -#' 1D convolution. -#' -#' @param x Tensor or variable. -#' @param kernel kernel tensor. -#' @param strides stride integer. -#' @param padding string, `"same"`, `"causal"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' @param dilation_rate integer dilate rate. -#' -#' @return A tensor, result of 1D convolution. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_conv1d <- function(x, kernel, strides = 1, padding = "valid", data_format = NULL, dilation_rate = 1) { - keras$backend$conv1d( - x = x, - kernel = kernel, - strides = as.integer(strides), - padding = padding, - data_format = data_format, - dilation_rate = as.integer(dilation_rate) - ) -} - - -#' 2D convolution. -#' -#' @param x Tensor or variable. -#' @param kernel kernel tensor. -#' @param strides strides -#' @param padding string, `"same"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. Whether -#' to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. -#' @param dilation_rate vector of 2 integers. -#' -#' @return A tensor, result of 2D convolution. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_conv2d <- function(x, kernel, strides = c(1, 1), padding = "valid", - data_format = NULL, - dilation_rate = c(1, 1)) { - keras$backend$conv2d( - x = x, - kernel = kernel, - strides = as.integer(strides), - padding = padding, - data_format = data_format, - dilation_rate = as.integer(dilation_rate) - ) -} - - -#' 2D deconvolution (i.e. transposed convolution). -#' -#' @param x Tensor or variable. -#' @param kernel kernel tensor. -#' @param output_shape 1D int tensor for the output shape. -#' @param strides strides list. -#' @param padding string, `"same"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. Whether -#' to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. -#' -#' @return A tensor, result of transposed 2D convolution. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_conv2d_transpose <- function(x, kernel, output_shape, strides = c(1, 1), - padding = "valid", data_format = NULL) { - keras$backend$conv2d_transpose( - x = x, - kernel = kernel, - output_shape = output_shape, - strides = as.integer(strides), - padding = padding, - data_format = data_format - ) -} - - -#' 3D convolution. -#' -#' @param x Tensor or variable. -#' @param kernel kernel tensor. -#' @param strides strides -#' @param padding string, `"same"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. Whether -#' to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. -#' @param dilation_rate list of 3 integers. -#' -#' @return A tensor, result of 3D convolution. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_conv3d <- function(x, kernel, strides = c(1, 1, 1), padding = "valid", - data_format = NULL, dilation_rate = c(1, 1, 1)) { - keras$backend$conv3d( - x = x, - kernel = kernel, - strides = as.integer(strides), - padding = padding, - data_format = data_format, - dilation_rate = as.integer(dilation_rate) - ) -} - - -#' 3D deconvolution (i.e. transposed convolution). -#' -#' @param x input tensor. -#' @param kernel kernel tensor. -#' @param output_shape 1D int tensor for the output shape. -#' @param strides strides -#' @param padding string, "same" or "valid". -#' @param data_format string, `"channels_last"` or `"channels_first"`. Whether -#' to use Theano or TensorFlow/CNTK data format for inputs/kernels/outputs. -#' -#' @return A tensor, result of transposed 3D convolution. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_conv3d_transpose <- function(x, kernel, output_shape, strides = c(1, 1, 1), - padding = "valid", data_format = NULL) { - keras$backend$conv3d_transpose( - x = x, - kernel = kernel, - output_shape = output_shape, - strides = as.integer(strides), - padding = padding, - data_format = data_format - ) -} - - -#' Computes cos of x element-wise. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_cos <- function(x) { - keras$backend$cos( - x = x - ) -} - - -#' Returns the static number of elements in a Keras variable or tensor. -#' -#' @param x Keras variable or tensor. -#' -#' @return Integer, the number of elements in `x`, i.e., the product of the array's static dimensions. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_count_params <- function(x) { - keras$backend$count_params( - x = x - ) -} - - -#' Runs CTC loss algorithm on each batch element. -#' -#' @param y_true tensor `(samples, max_string_length)` containing the truth -#' labels. -#' @param y_pred tensor `(samples, time_steps, num_categories)` containing the -#' prediction, or output of the softmax. -#' @param input_length tensor `(samples, 1)` containing the sequence length for -#' each batch item in `y_pred`. -#' @param label_length tensor `(samples, 1)` containing the sequence length for -#' each batch item in `y_true`. -#' -#' @return Tensor with shape (samples,1) containing the CTC loss of each -#' element. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_ctc_batch_cost <- function(y_true, y_pred, input_length, label_length) { - keras$backend$ctc_batch_cost( - y_true = y_true, - y_pred = y_pred, - input_length = input_length, - label_length = label_length - ) -} - - -#' Decodes the output of a softmax. -#' -#' Can use either greedy search (also known as best path) or a constrained -#' dictionary search. -#' -#' @param y_pred tensor `(samples, time_steps, num_categories)` containing the -#' prediction, or output of the softmax. -#' @param input_length tensor `(samples, )` containing the sequence length for -#' each batch item in `y_pred`. -#' @param greedy perform much faster best-path search if `TRUE`. This does not -#' use a dictionary. -#' @param beam_width if `greedy` is `FALSE`: a beam search decoder will be used -#' with a beam of this width. -#' @param top_paths if `greedy` is `FALSE`, how many of the most probable paths -#' will be returned. -#' -#' @return If `greedy` is `TRUE`, returns a list of one element -#' that contains the decoded sequence. If `FALSE`, returns the `top_paths` -#' most probable decoded sequences. Important: blank labels are returned as -#' `-1`. Tensor `(top_paths)` that contains the log probability of each -#' decoded sequence. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_ctc_decode <- function(y_pred, input_length, greedy = TRUE, beam_width = 100L, top_paths = 1) { - keras$backend$ctc_decode( - y_pred = y_pred, - input_length = input_length, - greedy = greedy, - beam_width = as.integer(beam_width), - top_paths = as.integer(top_paths) - ) -} - - -#' Converts CTC labels from dense to sparse. -#' -#' @param labels dense CTC labels. -#' @param label_lengths length of the labels. -#' -#' @return A sparse tensor representation of the labels. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_ctc_label_dense_to_sparse <- function(labels, label_lengths) { - keras$backend$ctc_label_dense_to_sparse( - labels = labels, - label_lengths = label_lengths - ) -} - - -#' Cumulative product of the values in a tensor, alongside the specified axis. -#' -#' @param x A tensor or variable. -#' @param axis An integer, the axis to compute the product (axis indexes are -#' 1-based). -#' -#' @return A tensor of the cumulative product of values of `x` along `axis`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_cumprod <- function(x, axis = 1) { - keras$backend$cumprod( - x = x, - axis = as_axis(axis) - ) -} - - -#' Cumulative sum of the values in a tensor, alongside the specified axis. -#' -#' @param x A tensor or variable. -#' @param axis An integer, the axis to compute the sum (axis indexes are -#' 1-based). -#' -#' @return A tensor of the cumulative sum of values of `x` along `axis`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_cumsum <- function(x, axis = 1) { - keras$backend$cumsum( - x = x, - axis = as_axis(axis) - ) -} - - -#' Depthwise 2D convolution with separable filters. -#' -#' @param x input tensor -#' @param depthwise_kernel convolution kernel for the depthwise convolution. -#' @param strides strides (length 2). -#' @param padding string, `"same"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' @param dilation_rate vector of integers, dilation rates for the separable -#' convolution. -#' -#' @return Output tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_depthwise_conv2d <- function(x, depthwise_kernel, strides = c(1, 1), padding = "valid", - data_format = NULL, dilation_rate = c(1, 1)) { - keras$backend$depthwise_conv2d( - x = x, - depthwise_kernel = depthwise_kernel, - strides = as.integer(strides), - padding = padding, - data_format = data_format, - dilation_rate = as.integer(dilation_rate) - ) -} - - -#' Multiplies 2 tensors (and/or variables) and returns a *tensor*. -#' -#' When attempting to multiply a nD tensor -#' with a nD tensor, it reproduces the Theano behavior. -#' (e.g. `(2, 3) * (4, 3, 5) -> (2, 4, 5)`) -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A tensor, dot product of `x` and `y`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_dot <- function(x, y) { - keras$backend$dot( - x = x, - y = y - ) -} - - -#' Sets entries in `x` to zero at random, while scaling the entire tensor. -#' -#' @param x tensor -#' @param level fraction of the entries in the tensor that will be set to 0. -#' @param noise_shape shape for randomly generated keep/drop flags, must be -#' broadcastable to the shape of `x` -#' @param seed random seed to ensure determinism. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_dropout <- function(x, level, noise_shape = NULL, seed = NULL) { - keras$backend$dropout( - x = x, - level = level, - noise_shape = noise_shape, - seed = seed - ) -} - - -#' Returns the dtype of a Keras tensor or variable, as a string. -#' -#' @param x Tensor or variable. -#' -#' @return String, dtype of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_dtype <- function(x) { - keras$backend$dtype( - x = x - ) -} - - -#' Exponential linear unit. -#' -#' @param x A tensor or variable to compute the activation function for. -#' @param alpha A scalar, slope of negative section. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_elu <- function(x, alpha = 1.0) { - keras$backend$elu( - x = x, - alpha = alpha - ) -} - - -#' Fuzz factor used in numeric expressions. -#' -#' @param e float. New value of epsilon. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_epsilon <- function() { - keras$backend$epsilon( - ) -} - -#' @rdname k_epsilon -#' @export -k_set_epsilon <- function(e) { - keras$backend$set_epsilon(e) -} - - - -#' Element-wise equality between two tensors. -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A bool tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_equal <- function(x, y) { - keras$backend$equal( - x = x, - y = y - ) -} - - -#' Evaluates the value of a variable. -#' -#' @param x A variable. -#' -#' @return An R array. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_eval <- function(x) { - keras$backend$eval( - x = x - ) -} - - -#' Element-wise exponential. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_exp <- function(x) { - keras$backend$exp( - x = x - ) -} - - -#' Adds a 1-sized dimension at index `axis`. -#' -#' @param x A tensor or variable. -#' @param axis Position where to add a new axis (axis indexes are 1-based). -#' Pass -1 (the default) to select the last axis. -#' -#' @return A tensor with expanded dimensions. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_expand_dims <- function(x, axis = -1) { - keras$backend$expand_dims( - x = x, - axis = as_axis(axis) - ) -} - - -#' Instantiate an identity matrix and returns it. -#' -#' @param size Integer, number of rows/columns. -#' @param dtype String, data type of returned Keras variable. -#' @param name String, name of returned Keras variable. -#' -#' @return A Keras variable, an identity matrix. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_eye <- function(size, dtype = NULL, name = NULL) { - keras$backend$eye( - size = as.integer(size), - dtype = dtype, - name = name - ) -} - - -#' Flatten a tensor. -#' -#' @param x A tensor or variable. -#' -#' @return A tensor, reshaped into 1-D -#' -#' @template roxlate-keras-backend -#' -#' @export -k_flatten <- function(x) { - keras$backend$flatten( - x = x - ) -} - - -#' Default float type -#' -#' @param floatx String, 'float16', 'float32', or 'float64'. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_floatx <- function() { - keras$backend$floatx( - ) -} - -#' @rdname k_floatx -#' @export -k_set_floatx <- function(floatx) { - keras$backend$set_floatx(floatx) -} - - -#' Reduce elems using fn to combine them from left to right. -#' -#' @param fn Function that will be called upon each element in elems and an -#' accumulator -#' @param elems tensor -#' @param initializer The first value used (first element of `elems` in case of -#' `NULL``) -#' @param name A string name for the foldl node in the graph -#' -#' @return Tensor with same type and shape as `initializer`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_foldl <- function(fn, elems, initializer = NULL, name = NULL) { - keras$backend$foldl( - fn = fn, - elems = elems, - initializer = initializer, - name = name - ) -} - - -#' Reduce elems using fn to combine them from right to left. -#' -#' @param fn Function that will be called upon each element in elems and an -#' accumulator -#' @param elems tensor -#' @param initializer The first value used (last element of `elems` in case of -#' NULL) -#' @param name A string name for the foldr node in the graph -#' -#' @return Tensor with same type and shape as `initializer`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_foldr <- function(fn, elems, initializer = NULL, name = NULL) { - keras$backend$foldr( - fn = fn, - elems = elems, - initializer = initializer, - name = name - ) -} - -#' Instantiates a Keras function -#' -#' @param inputs List of placeholder tensors. -#' @param outputs List of output tensors. -#' @param updates List of update ops. -#' @param ... Named arguments passed to `tf$Session$run`. -#' -#' @return Output values as R arrays. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_function <- function(inputs, outputs, updates = NULL, ...) { - keras$backend$`function`( - inputs = inputs, - outputs = outputs, - updates = updates, - ... - ) -} - - -#' Retrieves the elements of indices `indices` in the tensor `reference`. -#' -#' @param reference A tensor. -#' @param indices Indices. Dimension indices are 1-based. Note however that if you pass a -#' tensor for `indices` they will be passed as-is, in which case indices will be 0 based -#' because no normalizing of R 1-based axes to Python 0-based axes is performed. -#' -#' @return A tensor of same type as `reference`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_gather <- function(reference, indices) { - - # offset indices if it's an R object - if (!inherits(indices, "python.builtin.object")) - indices <- as_axis(indices) - - keras$backend$gather( - reference = reference, - indices = indices - ) -} - - -#' TF session to be used by the backend. -#' -#' If a default TensorFlow session is available, we will return it. Else, we -#' will return the global Keras session. If no global Keras session exists at -#' this point: we will create a new global session. Note that you can manually -#' set the global session via `k_set_session()`. -#' -#' @param session A TensorFlow Session. -#' -#' @return A TensorFlow session -#' -#' @template roxlate-keras-backend -#' -#' @export -k_get_session <- function() { - - if(tensorflow::tf_version() >= '2.0'){ - warning("Tensorflow 2.0 does not expose the 'k_get_session()' directly any more. Instead use 'tf$compat$v1$keras$backend$get_session()'", call. = FALSE) - tensorflow::tf$compat$v1$keras$backend$get_session() - } - else - keras$backend$get_session() -} - -#' @rdname k_get_session -#' @export -k_set_session <- function(session) { - - if(tensorflow::tf_version() >= '2.0'){ - warning("Tensorflow 2.0 does not expose the 'k_set_session()' directly any more. Instead use 'tf$compat$v1$keras$backend$set_session()'", call. = FALSE) - tensorflow::tf$compat$v1$keras$backend$set_session(session = session) - } - else - keras$backend$set_session( - session = session - ) -} - - -#' Get the uid for the default graph. -#' -#' @param prefix An optional prefix of the graph. -#' -#' @return A unique identifier for the graph. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_get_uid <- function(prefix = "") { - keras$backend$get_uid( - prefix = prefix - ) -} - - -#' Returns the value of a variable. -#' -#' @param x input variable. -#' -#' @return An R array. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_get_value <- function(x) { - keras$backend$get_value( - x = x - ) -} - - -#' Returns the shape of a variable. -#' -#' @param x A variable. -#' -#' @return A vector of integers. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_get_variable_shape <- function(x) { - keras$backend$get_variable_shape( - x = x - ) -} - - -#' Returns the gradients of `variables` w.r.t. `loss`. -#' -#' @param loss Scalar tensor to minimize. -#' @param variables List of variables. -#' -#' @return A gradients tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_gradients <- function(loss, variables) { - keras$backend$gradients( - loss = loss, - variables = variables - ) -} - - -#' Element-wise truth value of (x > y). -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A bool tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_greater <- function(x, y) { - keras$backend$greater( - x = x, - y = y - ) -} - - -#' Element-wise truth value of (x >= y). -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A bool tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_greater_equal <- function(x, y) { - keras$backend$greater_equal( - x = x, - y = y - ) -} - - -#' Segment-wise linear approximation of sigmoid. -#' -#' Faster than sigmoid. -#' Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`. -#' In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`. -#' -#' @param x A tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_hard_sigmoid <- function(x) { - keras$backend$hard_sigmoid( - x = x - ) -} - - -#' Returns a tensor with the same content as the input tensor. -#' -#' @param x The input tensor. -#' @param name String, name for the variable to create. -#' -#' @return A tensor of the same shape, type and content. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_identity <- function(x, name = NULL) { - keras$backend$identity( - x = x, - name = name - ) -} - - -#' Default image data format convention ('channels_first' or 'channels_last'). -#' -#' @param data_format string. `'channels_first'` or `'channels_last'`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_image_data_format <- function() { - keras$backend$image_data_format( - ) -} - -#' @rdname k_image_data_format -#' @export -k_set_image_data_format <- function(data_format) { - keras$backend$set_image_data_format( - data_format = data_format - ) -} - - -#' Selects `x` in test phase, and `alt` otherwise. -#' -#' Note that `alt` should have the *same shape* as `x`. -#' -#' @param x What to return in test phase (tensor or function that returns a -#' tensor). -#' @param alt What to return otherwise (tensor or function that returns a -#' tensor). -#' @param training Optional scalar tensor (or R logical or integer) specifying -#' the learning phase. -#' -#' @return Either `x` or `alt` based on `k_learning_phase()`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_in_test_phase <- function(x, alt, training = NULL) { - keras$backend$in_test_phase( - x = x, - alt = alt, - training = training - ) -} - - -#' Returns whether the `targets` are in the top `k` `predictions`. -#' -#' @param predictions A tensor of shape `(batch_size, classes)` and type -#' `float32`. -#' @param targets A 1D tensor of length `batch_size` and type `int32` or -#' `int64`. -#' @param k An `int`, number of top elements to consider. -#' -#' @return A 1D tensor of length `batch_size` and type `bool`. `output[[i]]` is -#' `TRUE` if `predictions[i, targets[[i]]` is within top-`k` values of -#' `predictions[[i]]`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_in_top_k <- function(predictions, targets, k) { - keras$backend$in_top_k( - predictions = predictions, - targets = targets, - k = as.integer(k) - ) -} - - -#' Selects `x` in train phase, and `alt` otherwise. -#' -#' Note that `alt` should have the *same shape* as `x`. -#' -#' @param x What to return in train phase (tensor or function that returns a -#' tensor). -#' @param alt What to return otherwise (tensor or function that returns a -#' tensor). -#' @param training Optional scalar tensor (or R logical or integer) specifying -#' the learning phase. -#' -#' @return Either `x` or `alt` based on the `training` flag. the `training` -#' flag defaults to `k_learning_phase()`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_in_train_phase <- function(x, alt, training = NULL) { - keras$backend$in_train_phase( - x = x, - alt = alt, - training = training - ) -} - - -#' Returns the shape of tensor or variable as a list of int or NULL entries. -#' -#' @param x Tensor or variable. -#' -#' @return A list of integers (or NULL entries). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_int_shape <- function(x) { - keras$backend$int_shape( - x = x - ) -} - -#' Returns whether `x` is a symbolic tensor. -#' -#' @param x A candidate tensor. -#' -#' @return A logical: Whether the argument is a symbolic tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_is_tensor <- function(x) { - keras$backend$is_tensor( - x = x - ) -} - - - -#' Returns whether `x` is a Keras tensor. -#' -#' A "Keras tensor" is a tensor that was returned by a Keras layer -#' -#' @param x A candidate tensor. -#' -#' @return A logical: Whether the argument is a Keras tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_is_keras_tensor <- function(x) { - keras$backend$is_keras_tensor( - x = x - ) -} - - -#' Returns whether `x` is a placeholder. -#' -#' @param x A candidate placeholder. -#' -#' @return A logical -#' -#' @template roxlate-keras-backend -#' -#' @export -k_is_placeholder <- function(x) { - keras$backend$is_placeholder( - x = x - ) -} - - -#' Returns whether a tensor is a sparse tensor. -#' -#' @param tensor A tensor instance. -#' -#' @return A logical -#' -#' @template roxlate-keras-backend -#' -#' @export -k_is_sparse <- function(tensor) { - keras$backend$is_sparse( - tensor = tensor - ) -} - - -#' Normalizes a tensor wrt the L2 norm alongside the specified axis. -#' -#' @param x Tensor or variable. -#' @param axis Axis along which to perform normalization (axis indexes -#' are 1-based) -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_l2_normalize <- function(x, axis = NULL) { - keras$backend$l2_normalize( - x = x, - axis = as_axis(axis) - ) -} - - -#' Returns the learning phase flag. -#' -#' The learning phase flag is a bool tensor (0 = test, 1 = train) to be passed -#' as input to any Keras function that uses a different behavior at train time -#' and test time. -#' -#' @return Learning phase (scalar integer tensor or R integer). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_learning_phase <- function() { - keras$backend$learning_phase() -} - - -#' Element-wise truth value of (x < y). -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A bool tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_less <- function(x, y) { - keras$backend$less( - x = x, - y = y - ) -} - - -#' Element-wise truth value of (x <= y). -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A bool tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_less_equal <- function(x, y) { - keras$backend$less_equal( - x = x, - y = y - ) -} - - -#' Apply 1D conv with un-shared weights. -#' -#' @param inputs 3D tensor with shape: (batch_size, steps, input_dim) -#' @param kernel the unshared weight for convolution, with shape -#' (output_length, feature_dim, filters) -#' @param kernel_size a list of a single integer, specifying the length of the -#' 1D convolution window -#' @param strides a list of a single integer, specifying the stride length of -#' the convolution -#' @param data_format the data format, channels_first or channels_last -#' -#' @return the tensor after 1d conv with un-shared weights, with shape -#' (batch_size, output_length, filters) -#' -#' @template roxlate-keras-backend -#' -#' @export -k_local_conv1d <- function(inputs, kernel, kernel_size, strides, data_format = NULL) { - keras$backend$local_conv1d( - inputs = inputs, - kernel = kernel, - kernel_size = list(as.integer(kernel_size)), - strides = list(as.integer(strides)), - data_format = data_format - ) -} - - -#' Apply 2D conv with un-shared weights. -#' -#' @param inputs 4D tensor with shape: (batch_size, filters, new_rows, -#' new_cols) if data_format='channels_first' or 4D tensor with shape: -#' (batch_size, new_rows, new_cols, filters) if data_format='channels_last'. -#' @param kernel the unshared weight for convolution, with shape (output_items, -#' feature_dim, filters) -#' @param kernel_size a list of 2 integers, specifying the width and height of -#' the 2D convolution window. -#' @param strides a list of 2 integers, specifying the strides of the -#' convolution along the width and height. -#' @param output_shape a list with (output_row, output_col) -#' @param data_format the data format, channels_first or channels_last -#' -#' @return A 4d tensor with shape: (batch_size, filters, new_rows, new_cols) if -#' data_format='channels_first' or 4D tensor with shape: (batch_size, -#' new_rows, new_cols, filters) if data_format='channels_last'. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_local_conv2d <- function(inputs, kernel, kernel_size, strides, output_shape, data_format = NULL) { - keras$backend$local_conv2d( - inputs = inputs, - kernel = kernel, - kernel_size = as.integer(kernel_size), - strides = as.integer(strides), - output_shape = as.integer(output_shape), - data_format = data_format - ) -} - - -#' Element-wise log. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_log <- function(x) { - keras$backend$log( - x = x - ) -} - - -#' (Deprecated) Computes log(sum(exp(elements across dimensions of a tensor))). -#' -#' This funciton is deprecated. Please use `tensorflow::tf$reduce_logsumexp()`. -#' -#' This function is more numerically stable than log(sum(exp(x))). It avoids -#' overflows caused by taking the exp of large inputs and underflows caused by -#' taking the log of small inputs. -#' -#' @param x A tensor or variable. -#' @param axis An integer, the axis to reduce over (axis indexes are 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1. If -#' `keepdims` is `TRUE`, the reduced dimension is retained with length 1. -#' -#' @return The reduced tensor. -#' -#' @template roxlate-keras-backend -#' -#' @keywords internal -#' @export -k_logsumexp <- function(x, axis = NULL, keepdims = FALSE) { - - if (tensorflow::tf_version() >= "2.2") - stop("k_logsumexp is deprecated. use tensorflow::tf$reduce_logsumexp ") - - keras$backend$logsumexp( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Sets the manual variable initialization flag. -#' -#' This boolean flag determines whether variables should be initialized as they -#' are instantiated (default), or if the user should handle the initialization -#' (e.g. via `tf$initialize_all_variables()`). -#' -#' @param value Logical -#' -#' @template roxlate-keras-backend -#' -#' @export -k_manual_variable_initialization <- function(value) { - keras$backend$manual_variable_initialization( - value = value - ) -} - - -#' Map the function fn over the elements elems and return the outputs. -#' -#' @param fn Function that will be called upon each element in elems -#' @param elems tensor -#' @param name A string name for the map node in the graph -#' @param dtype Output data type. -#' -#' @return Tensor with dtype `dtype`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_map_fn <- function(fn, elems, name = NULL, dtype = NULL) { - keras$backend$map_fn( - fn = fn, - elems = elems, - name = name, - dtype = dtype - ) -} - - -#' Maximum value in a tensor. -#' -#' @param x A tensor or variable. -#' @param axis An integer, the axis to find maximum values (axis indexes are -#' 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1. If -#' `keepdims` is `TRUE`, the reduced dimension is retained with length 1. -#' -#' @return A tensor with maximum values of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_max <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$max( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Element-wise maximum of two tensors. -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_maximum <- function(x, y) { - keras$backend$maximum( - x = x, - y = y - ) -} - - -#' Mean of a tensor, alongside the specified axis. -#' -#' @param x A tensor or variable. -#' @param axis A list of axes to compute the mean over (axis indexes are -#' 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1 for each -#' entry in `axis`. If `keep_dims` is `TRUE`, the reduced dimensions are -#' retained with length 1. -#' -#' @return A tensor with the mean of elements of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_mean <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$mean( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Minimum value in a tensor. -#' -#' @param x A tensor or variable. -#' @param axis An integer, axis to find minimum values (axis indexes are -#' 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1. If -#' `keepdims` is `TRUE`, the reduced dimension is retained with length 1. -#' -#' @return A tensor with miminum values of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_min <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$min( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Element-wise minimum of two tensors. -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_minimum <- function(x, y) { - keras$backend$minimum( - x = x, - y = y - ) -} - - -#' Compute the moving average of a variable. -#' -#' @param x A `Variable`. -#' @param value A tensor with the same shape as `x`. -#' @param momentum The moving average momentum. -#' -#' @return An operation to update the variable. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_moving_average_update <- function(x, value, momentum) { - keras$backend$moving_average_update( - x = x, - value = value, - momentum = momentum - ) -} - - - -#' Returns the number of axes in a tensor, as an integer. -#' -#' @param x Tensor or variable. -#' -#' @return Integer (scalar), number of axes. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_ndim <- function(x) { - keras$backend$ndim( - x = x - ) -} - - -#' Computes mean and std for batch then apply batch_normalization on batch. -#' -#' @param x Input tensor or variable. -#' @param gamma Tensor by which to scale the input. -#' @param beta Tensor with which to center the input. -#' @param reduction_axes iterable of integers, axes over which to normalize. -#' @param epsilon Fuzz factor. -#' -#' @return A list length of 3, `(normalized_tensor, mean, variance)`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_normalize_batch_in_training <- function(x, gamma, beta, reduction_axes, epsilon = 0.001) { - keras$backend$normalize_batch_in_training( - x = x, - gamma = gamma, - beta = beta, - reduction_axes = as_integer_tuple(reduction_axes), - epsilon = epsilon - ) -} - - -#' Element-wise inequality between two tensors. -#' -#' @param x Tensor or variable. -#' @param y Tensor or variable. -#' -#' @return A bool tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_not_equal <- function(x, y) { - keras$backend$not_equal( - x = x, - y = y - ) -} - - -#' Computes the one-hot representation of an integer tensor. -#' -#' @param indices nD integer tensor of shape `(batch_size, dim1, dim2, ... -#' dim(n-1))` -#' @param num_classes Integer, number of classes to consider. -#' -#' @return (n + 1)D one hot representation of the input with shape -#' `(batch_size, dim1, dim2, ... dim(n-1), num_classes)` -#' -#' @template roxlate-keras-backend -#' -#' @export -k_one_hot <- function(indices, num_classes) { - keras$backend$one_hot( - indices = indices, - num_classes = as.integer(num_classes) - ) -} - - -#' Instantiates an all-ones tensor variable and returns it. -#' -#' @param shape Tuple of integers, shape of returned Keras variable. -#' @param dtype String, data type of returned Keras variable. -#' @param name String, name of returned Keras variable. -#' -#' @return A Keras variable, filled with `1.0`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_ones <- function(shape, dtype = NULL, name = NULL) { - keras$backend$ones( - shape = as_integer_tuple(shape), - dtype = dtype, - name = name - ) -} - - -#' Instantiates an all-ones variable of the same shape as another tensor. -#' -#' @param x Keras variable or tensor. -#' @param dtype String, dtype of returned Keras variable. NULL uses the dtype -#' of x. -#' @param name String, name for the variable to create. -#' -#' @return A Keras variable with the shape of x filled with ones. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_ones_like <- function(x, dtype = NULL, name = NULL) { - keras$backend$ones_like( - x = x, - dtype = dtype, - name = name - ) -} - - -#' Permutes axes in a tensor. -#' -#' @param x Tensor or variable. -#' @param pattern A list of dimension indices, e.g. `(1, 3, 2)`. Dimension -#' indices are 1-based. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_permute_dimensions <- function(x, pattern) { - keras$backend$permute_dimensions( - x = x, - pattern = as_axis(pattern) - ) -} - - -#' Instantiates a placeholder tensor and returns it. -#' -#' @param shape Shape of the placeholder (integer list, may include `NULL` -#' entries). -#' @param ndim Number of axes of the tensor. At least one of {`shape`, `ndim`} -#' must be specified. If both are specified, `shape` is used. -#' @param dtype Placeholder type. -#' @param sparse Logical, whether the placeholder should have a sparse type. -#' @param name Optional name string for the placeholder. -#' -#' @return Tensor instance (with Keras metadata included). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_placeholder <- function(shape = NULL, ndim = NULL, dtype = NULL, sparse = FALSE, name = NULL) { - keras$backend$placeholder( - shape = backend_normalize_shape(shape), - ndim = as.integer(ndim), - dtype = dtype, - sparse = sparse, - name = name - ) -} - - -#' 2D Pooling. -#' -#' @param x Tensor or variable. -#' @param pool_size list of 2 integers. -#' @param strides list of 2 integers. -#' @param padding string, `"same"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' @param pool_mode string, `"max"` or `"avg"`. -#' -#' @return A tensor, result of 2D pooling. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_pool2d <- function(x, pool_size, strides = c(1, 1), padding = "valid", data_format = NULL, pool_mode = "max") { - keras$backend$pool2d( - x = x, - pool_size = as.integer(pool_size), - strides = as.integer(strides), - padding = padding, - data_format = data_format, - pool_mode = pool_mode - ) -} - - -#' 3D Pooling. -#' -#' @param x Tensor or variable. -#' @param pool_size list of 3 integers. -#' @param strides list of 3 integers. -#' @param padding string, `"same"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' @param pool_mode string, `"max"` or `"avg"`. -#' -#' @return A tensor, result of 3D pooling. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_pool3d <- function(x, pool_size, strides = c(1, 1, 1), padding = "valid", - data_format = NULL, pool_mode = "max") { - keras$backend$pool3d( - x = x, - pool_size = as.integer(pool_size), - strides = as.integer(strides), - padding = padding, - data_format = data_format, - pool_mode = pool_mode - ) -} - - -#' Element-wise exponentiation. -#' -#' @param x Tensor or variable. -#' @param a R integer. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_pow <- function(x, a) { - keras$backend$pow( - x = x, - a = as_integer(a) - ) -} - - - -#' Prints `message` and the tensor value when evaluated. -#' -#' Note that `print_tensor` returns a new tensor identical to `x` which should -#' be used in the following code. Otherwise the print operation is not taken -#' into account during evaluation. -#' -#' @param x Tensor to print. -#' @param message Message to print jointly with the tensor. -#' -#' @return The same tensor `x`, unchanged. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_print_tensor <- function(x, message = "") { - keras$backend$print_tensor( - x = x, - message = message - ) -} - - -#' Multiplies the values in a tensor, alongside the specified axis. -#' -#' @param x A tensor or variable. -#' @param axis An integer, axis to compute the product over (axis indexes are -#' 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1. If -#' `keepdims` is `TRUE`, the reduced dimension is retained with length 1. -#' -#' @return A tensor with the product of elements of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_prod <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$prod( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Returns a tensor with random binomial distribution of values. -#' -#' `k_random_binomial()` and `k_random_bernoulli()` are aliases for the same -#' function. Both are maintained for backwards compatibility. New code -#' should prefer `k_random_bernoulli()`. -#' -#' @param shape A list of integers, the shape of tensor to create. -#' @param p A float, `0. <= p <= 1`, probability of binomial distribution. -#' @param dtype String, dtype of returned tensor. -#' @param seed Integer, random seed. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @rdname k_random_bernoulli -#' @export -k_random_binomial <- - function(shape, - p = 0.0, - dtype = NULL, - seed = NULL) { - args <- capture_args(match.call(), - list(shape = backend_normalize_shape, - seed = as_nullable_integer)) - - fn <- if (tf_version() >= "2.3") - keras$backend$random_bernoulli - else - keras$backend$random_binomial - - - { - # workaround around exception raised for non-float dtypes - # https://github.com/keras-team/keras/issues/15659 - - if (!is.null(args$dtype)) - args$dtype <- tensorflow::tf$as_dtype(args$dtype) - - if (isFALSE(args$dtype$is_floating)) { - out_dtype <- args$dtype - args$dtype <- k_floatx() - res <- do.call(fn, args) - return(k_cast(res, out_dtype)) - } - } - - do.call(fn, args) - } - -#' @export -#' @rdname k_random_bernoulli -k_random_bernoulli <- k_random_binomial - - -#' Returns a tensor with normal distribution of values. -#' -#' @param shape A list of integers, the shape of tensor to create. -#' @param mean A float, mean of the normal distribution to draw samples. -#' @param stddev A float, standard deviation of the normal distribution to draw -#' samples. -#' @param dtype String, dtype of returned tensor. -#' @param seed Integer, random seed. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_random_normal <- function(shape, mean = 0.0, stddev = 1.0, dtype = NULL, seed = NULL) { - keras$backend$random_normal( - shape = backend_normalize_shape(shape), - mean = mean, - stddev = stddev, - dtype = dtype, - seed = as_nullable_integer(seed) - ) -} - - -#' Instantiates a variable with values drawn from a normal distribution. -#' -#' @param shape Tuple of integers, shape of returned Keras variable. -#' @param mean Float, mean of the normal distribution. -#' @param scale Float, standard deviation of the normal distribution. -#' @param dtype String, dtype of returned Keras variable. -#' @param name String, name of returned Keras variable. -#' @param seed Integer, random seed. -#' -#' @return A Keras variable, filled with drawn samples. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_random_normal_variable <- function(shape, mean, scale, dtype = NULL, name = NULL, seed = NULL) { - keras$backend$random_normal_variable( - shape = backend_normalize_shape(shape), - mean = mean, - scale = scale, - dtype = dtype, - name = name, - seed = as_nullable_integer(seed) - ) -} - - -#' Returns a tensor with uniform distribution of values. -#' -#' @param shape A list of integers, the shape of tensor to create. -#' @param minval A float, lower boundary of the uniform distribution to draw samples. -#' @param maxval A float, upper boundary of the uniform distribution to draw samples. -#' @param dtype String, dtype of returned tensor. -#' @param seed Integer, random seed. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_random_uniform <- function(shape, minval = 0.0, maxval = 1.0, dtype = NULL, seed = NULL) { - if(!is.null(dtype)) { - minval <- as_tensor(minval, dtype = dtype) - maxval <- as_tensor(maxval, dtype = dtype) - } - keras$backend$random_uniform( - shape = backend_normalize_shape(shape), - minval = minval, - maxval = maxval, - dtype = dtype, - seed = as_nullable_integer(seed) - ) -} - - -#' Instantiates a variable with values drawn from a uniform distribution. -#' -#' @param shape Tuple of integers, shape of returned Keras variable. -#' @param low Float, lower boundary of the output interval. -#' @param high Float, upper boundary of the output interval. -#' @param dtype String, dtype of returned Keras variable. -#' @param name String, name of returned Keras variable. -#' @param seed Integer, random seed. -#' -#' @return A Keras variable, filled with drawn samples. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_random_uniform_variable <- function(shape, low, high, dtype = NULL, name = NULL, seed = NULL) { - keras$backend$random_uniform_variable( - shape = backend_normalize_shape(shape), - low = low, - high = high, - dtype = dtype, - name = name, - seed = as_nullable_integer(seed) - ) -} - - -#' Rectified linear unit. -#' -#' With default values, it returns element-wise `max(x, 0)`. -#' -#' @param x A tensor or variable. -#' @param alpha A scalar, slope of negative section (default=`0.`). -#' @param max_value Saturation threshold. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_relu <- function(x, alpha = 0.0, max_value = NULL) { - keras$backend$relu( - x = x, - alpha = alpha, - max_value = max_value - ) -} - - -#' Repeats a 2D tensor. -#' -#' If x has shape (samples, dim) and n is 2, the output will have shape -#' (samples, 2, dim). -#' -#' @param x Tensor or variable. -#' @param n Integer, number of times to repeat. -#' -#' @return A tensor -#' -#' @template roxlate-keras-backend -#' -#' @export -k_repeat <- function(x, n) { - keras$backend$`repeat`( - x = x, - n = as.integer(n) - ) -} - - -#' Repeats the elements of a tensor along an axis. -#' -#' If `x` has shape `(s1, s2, s3)` and `axis` is `2`, the output -#' will have shape `(s1, s2 * rep, s3)`. -#' -#' @param x Tensor or variable. -#' @param rep Integer, number of times to repeat. -#' @param axis Axis along which to repeat (axis indexes are 1-based) -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_repeat_elements <- function(x, rep, axis) { - keras$backend$repeat_elements( - x = x, - rep = as.integer(rep), - axis = as_axis(axis) - ) -} - - -#' Reset graph identifiers. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_reset_uids <- function() { - keras$backend$reset_uids( - ) -} - - -#' Reshapes a tensor to the specified shape. -#' -#' @param x Tensor or variable. -#' @param shape Target shape list. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_reshape <- function(x, shape) { - keras$backend$reshape( - x = x, - shape = backend_normalize_shape(shape) - ) -} - - -#' Resizes the images contained in a 4D tensor. -#' -#' @param x Tensor or variable to resize. -#' @param height_factor Positive integer. -#' @param width_factor Positive integer. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_resize_images <- function(x, height_factor, width_factor, data_format) { - keras$backend$resize_images( - x = x, - height_factor = as.integer(height_factor), - width_factor = as.integer(width_factor), - data_format = data_format - ) -} - - -#' Resizes the volume contained in a 5D tensor. -#' -#' @param x Tensor or variable to resize. -#' @param depth_factor Positive integer. -#' @param height_factor Positive integer. -#' @param width_factor Positive integer. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_resize_volumes <- function(x, depth_factor, height_factor, width_factor, data_format) { - keras$backend$resize_volumes( - x = x, - depth_factor = as.integer(depth_factor), - height_factor = as.integer(height_factor), - width_factor = as.integer(width_factor), - data_format = data_format - ) -} - - -#' Reverse a tensor along the specified axes. -#' -#' @param x Tensor to reverse. -#' @param axes Integer or list of integers of axes to reverse (axis indexes are -#' 1-based). -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_reverse <- function(x, axes) { - keras$backend$reverse( - x = x, - axes = as_axis(axes) - ) -} - -#' Iterates over the time dimension of a tensor -#' -#' @param step_function RNN step function. -#' @param inputs Tensor with shape (samples, ...) (no time dimension), -#' representing input for the batch of samples at a certain time step. -#' @param initial_states Tensor with shape (samples, output_dim) (no time -#' dimension), containing the initial values for the states used in the step -#' function. -#' @param go_backwards Logical If `TRUE`, do the iteration over the time -#' dimension in reverse order and return the reversed sequence. -#' @param mask Binary tensor with shape (samples, time, 1), with a zero for -#' every element that is masked. -#' @param constants A list of constant values passed at each step. -#' @param unroll Whether to unroll the RNN or to use a symbolic loop -#' (while_loop or scan depending on backend). -#' @param input_length Not relevant in the TensorFlow implementation. Must be -#' specified if using unrolling with Theano. -#' -#' @return A list with: -#' -#' - `last_output`: the latest output of the rnn, of shape (samples, ...) -#' - `outputs`: tensor with shape (samples, time, ...) where each entry -#' `outputs[s, t]` is the output of the step function at time t for sample s. -#' - `new_states`: list of tensors, latest states returned by the step -#' function, of shape (samples, ...). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_rnn <- function(step_function, inputs, initial_states, go_backwards = FALSE, - mask = NULL, constants = NULL, unroll = FALSE, - input_length = NULL) { - keras$backend$rnn( - step_function = step_function, - inputs = inputs, - initial_states = initial_states, - go_backwards = go_backwards, - mask = mask, - constants = constants, - unroll = unroll, - input_length = as.integer(input_length) - ) -} - - -#' Element-wise rounding to the closest integer. -#' -#' In case of tie, the rounding mode used is "half to even". -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_round <- function(x) { - keras$backend$round( - x = x - ) -} - - -#' 2D convolution with separable filters. -#' -#' @param x input tensor -#' @param depthwise_kernel convolution kernel for the depthwise convolution. -#' @param pointwise_kernel kernel for the 1x1 convolution. -#' @param strides strides list (length 2). -#' @param padding string, `"same"` or `"valid"`. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' @param dilation_rate list of integers, dilation rates for the separable convolution. -#' -#' @return Output tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_separable_conv2d <- function(x, depthwise_kernel, pointwise_kernel, strides = c(1, 1), - padding = "valid", data_format = NULL, dilation_rate = c(1, 1)) { - keras$backend$separable_conv2d( - x = x, - depthwise_kernel = depthwise_kernel, - pointwise_kernel = pointwise_kernel, - strides = as.integer(strides), - padding = padding, - data_format = data_format, - dilation_rate = as.integer(dilation_rate) - ) -} - - -#' Sets the learning phase to a fixed value. -#' -#' @param value Learning phase value, either 0 or 1 (integers). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_set_learning_phase <- function(value) { - keras$backend$set_learning_phase( - value = as.integer(value) - ) -} - - -#' Sets the value of a variable, from an R array. -#' -#' @param x Tensor to set to a new value. -#' @param value Value to set the tensor to, as an R array (of the same shape). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_set_value <- function(x, value) { - keras$backend$set_value( - x = x, - value = value - ) -} - - -#' Returns the symbolic shape of a tensor or variable. -#' -#' @param x A tensor or variable. -#' -#' @return A symbolic shape (which is itself a tensor). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_shape <- function(x) { - keras$backend$shape( - x = x - ) -} - - -#' Element-wise sigmoid. -#' -#' @param x A tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_sigmoid <- function(x) { - keras$backend$sigmoid( - x = x - ) -} - - -#' Element-wise sign. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_sign <- function(x) { - keras$backend$sign( - x = x - ) -} - - -#' Computes sin of x element-wise. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_sin <- function(x) { - keras$backend$sin( - x = x - ) -} - - -#' Softmax of a tensor. -#' -#' @param x A tensor or variable. -#' @param axis The dimension softmax would be performed on. -#' The default is -1 which indicates the last dimension. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_softmax <- function(x, axis = -1) { - - args <- list( - x = x - ) - - if (keras_version() >= "2.1.6") - args$axis <- as_axis(axis) - - do.call(keras$backend$softmax, args) -} - - -#' Softplus of a tensor. -#' -#' @param x A tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_softplus <- function(x) { - keras$backend$softplus( - x = x - ) -} - - -#' Softsign of a tensor. -#' -#' @param x A tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_softsign <- function(x) { - keras$backend$softsign( - x = x - ) -} - - -#' Categorical crossentropy with integer targets. -#' -#' @inheritParams k_batch_normalization -#' -#' @param target An integer tensor. -#' @param output A tensor resulting from a softmax (unless `from_logits` is TRUE, in which case `output` is expected to be the logits). -#' @param from_logits Boolean, whether `output` is the result of a softmax, or is a tensor of logits. -#' -#' @return Output tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_sparse_categorical_crossentropy <- function(target, output, from_logits = FALSE, axis = -1) { - - args <- list( - target = target, - output = output, - from_logits = from_logits - ) - - if (keras_version() >= "2.2.0") - args$axis <- as_axis(axis) - - do.call(keras$backend$sparse_categorical_crossentropy, args) -} - - -#' Pads the 2nd and 3rd dimensions of a 4D tensor. -#' -#' @param x Tensor or variable. -#' @param padding Tuple of 2 lists, padding pattern. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' -#' @return A padded 4D tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_spatial_2d_padding <- function(x, padding = list(list(1, 1), list(1, 1)), data_format = NULL) { - keras$backend$spatial_2d_padding( - x = x, - padding = padding, - data_format = data_format - ) -} - - -#' Pads 5D tensor with zeros along the depth, height, width dimensions. -#' -#' Pads these dimensions with respectively `padding[[1]]`, `padding[[2]]`, and -#' `padding[[3]]` zeros left and right. For 'channels_last' data_format, the -#' 2nd, 3rd and 4th dimension will be padded. For 'channels_first' data_format, -#' the 3rd, 4th and 5th dimension will be padded. -#' -#' @param x Tensor or variable. -#' @param padding List of 3 lists, padding pattern. -#' @param data_format string, `"channels_last"` or `"channels_first"`. -#' -#' @return A padded 5D tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_spatial_3d_padding <- function(x, - padding = list(list(1, 1), list(1, 1), list(1, 1)), - data_format = NULL) { - keras$backend$spatial_3d_padding( - x = x, - padding = padding, - data_format = data_format - ) -} - - -#' Element-wise square root. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_sqrt <- function(x) { - keras$backend$sqrt( - x = x - ) -} - - -#' Element-wise square. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_square <- function(x) { - keras$backend$square( - x = x - ) -} - - -#' Removes a 1-dimension from the tensor at index `axis`. -#' -#' @param x A tensor or variable. -#' @param axis Axis to drop (axis indexes are 1-based). -#' -#' @return A tensor with the same data as `x` but reduced dimensions. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_squeeze <- function(x, axis = NULL) { - if(is.null(axis)) - return(tensorflow::tf$squeeze(x)) - keras$backend$squeeze( - x = x, - axis = as_axis(axis) - ) -} - - -#' Stacks a list of rank `R` tensors into a rank `R+1` tensor. -#' -#' @param x List of tensors. -#' @param axis Axis along which to perform stacking (axis indexes are 1-based). -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_stack <- function(x, axis = 1) { - keras$backend$stack( - x = x, - axis = as_axis(axis) - ) -} - - -#' Unstack rank `R` tensor into a list of rank `R-1` tensors. -#' -#' @param x a tensor. -#' @param axis Axis along which to perform stacking (axis indexes are 1-based). -#' Negative values wrap around, so the valid range is `[R, -R]`. -#' @param num An int. The length of the dimension axis. Automatically inferred -#' if NULL (the default). -#' @param name A name for the operation (optional). -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_unstack <- function(x, axis = 1L, num = NULL, name = NULL) { - tensorflow::tf$unstack(x, - num = as_nullable_integer(num), - axis = as_axis(axis), - name = name) -} - - -#' Standard deviation of a tensor, alongside the specified axis. -#' -#' @param x A tensor or variable. -#' @param axis An integer, the axis to compute the standard deviation over -#' (axis indexes are 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1. If -#' `keepdims` is `TRUE`, the reduced dimension is retained with length 1. -#' -#' @return A tensor with the standard deviation of elements of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_std <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$std( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Returns `variables` but with zero gradient w.r.t. every other variable. -#' -#' @param variables tensor or list of tensors to consider constant with respect -#' to any other variable. -#' -#' @return A single tensor or a list of tensors (depending on the passed -#' argument) that has constant gradient with respect to any other variable. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_stop_gradient <- function(variables) { - keras$backend$stop_gradient( - variables = variables - ) -} - - -#' Sum of the values in a tensor, alongside the specified axis. -#' -#' @param x A tensor or variable. -#' @param axis An integer, the axis to sum over (axis indexes are 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1. If -#' `keepdims` is `TRUE`, the reduced dimension is retained with length 1. -#' -#' @return A tensor with sum of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_sum <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$sum( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Switches between two operations depending on a scalar value. -#' -#' Note that both `then_expression` and `else_expression` -#' should be symbolic tensors of the *same shape*. -#' -#' @param condition tensor (`int` or `bool`). -#' @param then_expression either a tensor, or a function that returns a tensor. -#' @param else_expression either a tensor, or a function that returns a tensor. -#' -#' @return The selected tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_switch <- function(condition, then_expression, else_expression) { - keras$backend$switch( - condition = condition, - then_expression = then_expression, - else_expression = else_expression - ) -} - - - -#' Element-wise tanh. -#' -#' @param x A tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_tanh <- function(x) { - keras$backend$tanh( - x = x - ) -} - - -#' Pads the middle dimension of a 3D tensor. -#' -#' @param x Tensor or variable. -#' @param padding List of 2 integers, how many zeros to add at the start and end of dim 1. -#' -#' @return A padded 3D tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_temporal_padding <- function(x, padding = c(1, 1)) { - keras$backend$temporal_padding( - x = x, - padding = as_integer_tuple(padding, force_tuple = TRUE) - ) -} - -#' Creates a tensor by tiling `x` by `n`. -#' -#' @param x A tensor or variable -#' @param n A list of integers. The length must be the same as the number of dimensions in `x`. -#' -#' @return A tiled tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_tile <- function(x, n) { - if(!k_is_tensor(n)) - n <- as.integer(n) - keras$backend$tile( - x = x, - n = n - ) -} - - -#' Converts a sparse tensor into a dense tensor and returns it. -#' -#' @param tensor A tensor instance (potentially sparse). -#' -#' @return A dense tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_to_dense <- function(tensor) { - keras$backend$to_dense( - tensor = tensor - ) -} - - -#' Transposes a tensor and returns it. -#' -#' @param x Tensor or variable. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_transpose <- function(x) { - keras$backend$transpose( - x = x - ) -} - - -#' Returns a tensor with truncated random normal distribution of values. -#' -#' The generated values follow a normal distribution -#' with specified mean and standard deviation, -#' except that values whose magnitude is more than -#' two standard deviations from the mean are dropped and re-picked. -#' -#' @param shape A list of integers, the shape of tensor to create. -#' @param mean Mean of the values. -#' @param stddev Standard deviation of the values. -#' @param dtype String, dtype of returned tensor. -#' @param seed Integer, random seed. -#' -#' @return A tensor. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_truncated_normal <- function(shape, mean = 0.0, stddev = 1.0, dtype = NULL, seed = NULL) { - keras$backend$truncated_normal( - shape = backend_normalize_shape(shape), - mean = mean, - stddev = stddev, - dtype = dtype, - seed = as_nullable_integer(seed) - ) -} - - -#' Update the value of `x` to `new_x`. -#' -#' @param x A `Variable`. -#' @param new_x A tensor of same shape as `x`. -#' -#' @return The variable `x` updated. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_update <- function(x, new_x) { - keras$backend$update( - x = x, - new_x = new_x - ) -} - - -#' Update the value of `x` by adding `increment`. -#' -#' @param x A `Variable`. -#' @param increment A tensor of same shape as `x`. -#' -#' @return The variable `x` updated. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_update_add <- function(x, increment) { - keras$backend$update_add( - x = x, - increment = increment - ) -} - - -#' Update the value of `x` by subtracting `decrement`. -#' -#' @param x A `Variable`. -#' @param decrement A tensor of same shape as `x`. -#' -#' @return The variable `x` updated. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_update_sub <- function(x, decrement) { - keras$backend$update_sub( - x = x, - decrement = decrement - ) -} - - -#' Variance of a tensor, alongside the specified axis. -#' -#' @param x A tensor or variable. -#' @param axis An integer, the axis to compute the variance over (axis indexes -#' are 1-based). -#' @param keepdims A boolean, whether to keep the dimensions or not. If -#' `keepdims` is `FALSE`, the rank of the tensor is reduced by 1. If -#' `keepdims` is `TRUE`, the reduced dimension is retained with length 1. -#' -#' @return A tensor with the variance of elements of `x`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_var <- function(x, axis = NULL, keepdims = FALSE) { - keras$backend$var( - x = x, - axis = as_axis(axis), - keepdims = keepdims - ) -} - - -#' Instantiates a variable and returns it. -#' -#' @param value Numpy array, initial value of the tensor. -#' @param dtype Tensor type. -#' @param name Optional name string for the tensor. -#' @param constraint Optional projection function to be applied to the variable after an optimizer update. -#' -#' @return A variable instance (with Keras metadata included). -#' -#' @template roxlate-keras-backend -#' -#' @export -k_variable <- function(value, dtype = NULL, name = NULL, constraint = NULL) { - keras$backend$variable( - value = value, - dtype = dtype, - name = name, - constraint = constraint - ) -} - - -#' Instantiates an all-zeros variable and returns it. -#' -#' @param shape Tuple of integers, shape of returned Keras variable -#' @param dtype String, data type of returned Keras variable -#' @param name String, name of returned Keras variable -#' -#' @return A variable (including Keras metadata), filled with `0.0`. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_zeros <- function(shape, dtype = NULL, name = NULL) { - keras$backend$zeros( - shape = backend_normalize_shape(shape), - dtype = dtype, - name = name - ) -} - - -#' Instantiates an all-zeros variable of the same shape as another tensor. -#' -#' @param x Keras variable or Keras tensor. -#' @param dtype String, dtype of returned Keras variable. NULL uses the dtype -#' of x. -#' @param name String, name for the variable to create. -#' -#' @return A Keras variable with the shape of x filled with zeros. -#' -#' @template roxlate-keras-backend -#' -#' @export -k_zeros_like <- function(x, dtype = NULL, name = NULL) { - keras$backend$zeros_like( - x = x, - dtype = dtype, - name = name - ) -} - -as_axis <- function(axis) { - if (is.null(axis)) - return(NULL) - - if (length(axis) > 1) - return(lapply(axis, as_axis)) - - axis <- as.integer(axis) - - if (axis == 0L) - stop("`axis` argument is 1 based, received 0") - - if (axis > 0L) axis - 1L - else axis -} - - -backend_normalize_shape <- function(shape) { - - # if it's a Python object or a list with python objects then leave it alone - if (inherits(shape, "python.builtin.object")) - return(shape) - - normalize_shape(shape) -} diff --git a/R/callbacks.R b/R/callbacks.R index 908d67bc25..e593f296fd 100644 --- a/R/callbacks.R +++ b/R/callbacks.R @@ -1,811 +1,990 @@ -#' Callback that prints metrics to stdout. + +#' Callback to back up and restore the training state. #' -#' @param count_mode One of "steps" or "samples". Whether the progress bar -#' should count samples seens or steps (batches) seen. -#' @param stateful_metrics List of metric names that should *not* -#' be averaged onver an epoch. Metrics in this list will be logged -#' as-is in `on_epoch_end`. All others will be averaged in -#' `on_epoch_end`. +#' @description +#' `callback_backup_and_restore()` callback is intended to recover training from an +#' interruption that has happened in the middle of a `fit` execution, by +#' backing up the training states in a temporary checkpoint file, at the end of +#' each epoch. Each backup overwrites the previously written checkpoint file, +#' so at any given time there is at most one such checkpoint file for +#' backup/restoring purpose. #' -#' @family callbacks +#' If training restarts before completion, the training state (which includes +#' the model weights and epoch number) is restored to the most recently saved +#' state at the beginning of a new `fit` run. At the completion of a +#' `fit` run, the temporary checkpoint file is deleted. +#' +#' Note that the user is responsible to bring jobs back after the interruption. +#' This callback is important for the backup and restore mechanism for fault +#' tolerance purpose, and the model to be restored from a previous checkpoint +#' is expected to be the same as the one used to back up. If user changes +#' arguments passed to `compile` or `fit`, the checkpoint saved for fault tolerance +#' can become invalid. +#' +#' # Examples +#' +#' ```{r} +#' callback_interrupting <- new_callback_class( +#' "InterruptingCallback", +#' on_epoch_begin = function(epoch, logs = NULL) { +#' if (epoch == 4) { +#' stop('Interrupting!') +#' } +#' } +#' ) #' +#' backup_dir <- tempfile() +#' callback <- callback_backup_and_restore(backup_dir = backup_dir) +#' model <- keras_model_sequential() %>% +#' layer_dense(10) +#' model %>% compile(optimizer = optimizer_sgd(), loss = 'mse') +#' +#' # ensure model is built (i.e., weights are initialized) for +#' # callback_backup_and_restore() +#' model(op_ones(c(5, 20))) |> invisible() +#' +#' tryCatch({ +#' model %>% fit(x = op_ones(c(5, 20)), +#' y = op_zeros(5), +#' epochs = 10, batch_size = 1, +#' callbacks = list(callback, callback_interrupting()), +#' verbose = 0) +#' }, python.builtin.RuntimeError = function(e) message("Interrupted!")) +#' +#' model$history$epoch +#' # model$history %>% keras3:::to_keras_training_history() %>% as.data.frame() %>% print() +#' +#' history <- model %>% fit(x = op_ones(c(5, 20)), +#' y = op_zeros(5), +#' epochs = 10, batch_size = 1, +#' callbacks = list(callback), +#' verbose = 0) +#' +#' # Only 6 more epochs are run, since first training got interrupted at +#' # zero-indexed epoch 4, second training will continue from 4 to 9. +#' nrow(as.data.frame(history)) +#' ``` +#' +#' @param backup_dir +#' String, path of directory where to store the data +#' needed to restore the model. The directory +#' cannot be reused elsewhere to store other files, e.g. by the +#' `backup_and_restore` callback of another training run, +#' or by another callback (e.g. `callback_model_checkpoint`) +#' of the same training run. +#' +#' @param save_freq +#' `"epoch"`, integer, or `FALSE`. When set to `"epoch"`, +#' the callback saves the checkpoint at the end of each epoch. +#' When set to an integer, the callback saves the checkpoint every +#' `save_freq` batches. Set `save_freq = FALSE` only if using +#' preemption checkpointing (i.e. with `save_before_preemption = TRUE`). +#' +#' @param delete_checkpoint +#' Boolean, defaults to `TRUE`. This `backup_and_restore` +#' callback works by saving a checkpoint to back up the training state. +#' If `delete_checkpoint = TRUE`, the checkpoint will be deleted after +#' training is finished. Use `FALSE` if you'd like to keep the checkpoint +#' for future usage. +#' +#' @returns A `Callback` instance that can be passed to [`fit.keras.src.models.model.Model()`]. #' @export -callback_progbar_logger <- function(count_mode = "samples", stateful_metrics = NULL) { - args <- list( - count_mode = count_mode - ) - if (keras_version() >= "2.1.4") - args$stateful_metrics <- stateful_metrics - - do.call(keras$callbacks$ProgbarLogger, args) +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.BackupAndRestore +callback_backup_and_restore <- +function (backup_dir, save_freq = "epoch", delete_checkpoint = TRUE) +{ + args <- capture_args(list(save_freq = as_integer)) + do.call(keras$callbacks$BackupAndRestore, args) } - -#' Save the model after every epoch. -#' -#' `filepath` can contain named formatting options, which will be filled the -#' value of `epoch` and keys in `logs` (passed in `on_epoch_end`). For example: -#' if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, then the model -#' checkpoints will be saved with the epoch number and the validation loss in -#' the filename. -#' -#' @param filepath string, path to save the model file. -#' @param monitor quantity to monitor. -#' @param verbose verbosity mode, 0 or 1. -#' @param save_best_only if `save_best_only=TRUE`, the latest best model -#' according to the quantity monitored will not be overwritten. -#' @param save_weights_only if `TRUE`, then only the model's weights will be -#' saved (`save_model_weights_hdf5(filepath)`), else the full model is saved -#' (`save_model_hdf5(filepath)`). -#' @param mode one of "auto", "min", "max". If `save_best_only=TRUE`, the decision to -#' overwrite the current save file is made based on either the maximization or -#' the minimization of the monitored quantity. For val_acc, this should be -#' max, for val_loss this should be min, etc. In auto mode, the direction is -#' automatically inferred from the name of the monitored quantity. -#' @param period Interval (number of epochs) between checkpoints. -#' @param save_freq `'epoch'` or integer. When using 'epoch', the callback saves -#' the model after each epoch. When using integer, the callback saves the model -#' at end of a batch at which this many samples have been seen since last saving. -#' Note that if the saving isn't aligned to epochs, the monitored metric may -#' potentially be less reliable (it could reflect as little as 1 batch, since -#' the metrics get reset every epoch). Defaults to `'epoch'` -#' -#' @section For example: if `filepath` is -#' `weights.{epoch:02d}-{val_loss:.2f}.hdf5`,: then the model checkpoints will -#' be saved with the epoch number and the validation loss in the filename. +#' Callback that streams epoch results to a CSV file. #' -#' @family callbacks +#' @description +#' Supports all values that can be represented as a string, +#' including 1D iterables such as atomic vectors. +#' +#' # Examples +#' ```r +#' csv_logger <- callback_csv_logger('training.log') +#' model %>% fit(X_train, Y_train, callbacks = list(csv_logger)) +#' ``` #' +#' @param filename +#' Filename of the CSV file, e.g. `'run/log.csv'`. +#' +#' @param separator +#' String used to separate elements in the CSV file. +#' +#' @param append +#' Boolean. `TRUE`: append if file exists (useful for continuing +#' training). `FALSE`: overwrite existing file. +#' +#' @inherit callback_backup_and_restore return #' @export -callback_model_checkpoint <- function(filepath, monitor = "val_loss", verbose = 0, - save_best_only = FALSE, save_weights_only = FALSE, - mode = c("auto", "min", "max"), period = NULL, - save_freq = "epoch") { - - if (!save_weights_only && !have_h5py()) - stop("The h5py Python package is required to save model checkpoints") - - args <- list( - filepath = normalize_path(filepath), - monitor = monitor, - verbose = as.integer(verbose), - save_best_only = save_best_only, - save_weights_only = save_weights_only, - mode = match.arg(mode) - ) - - if (is_tensorflow_implementation()) { - if (tensorflow::tf_version() < "1.14") { - - if (!is.null(save_freq)) - warning( - "The save_freq argument is only used by TensorFlow >= 1.14. ", - "Update TensorFlow or use save_freq = NULL" - ) - - if (is.null(period)) - period <- 1L - - args$period <- as.integer(period) - } else { - - if (!is.null(period)) - warning( - "The period argument is deprecated since TF v1.14 and will be ignored. ", - "Use save_freq instead." - ) - - # save_freq can be a string or an integer - if (is.character(save_freq)) - args$save_freq <- save_freq - else - args$save_freq <- as_nullable_integer(save_freq) - } - } else if (is_backend("plaidml")) { - - if (!is.null(save_freq)) - warning("`save_freq` is ignored in plaidml. Use the `period` argument.") - - if (is.null(save_freq) && is.null(period)) - period <- 1L - - args$period <- as.integer(period) - } +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.CSVLogger +callback_csv_logger <- +function (filename, separator = ",", append = FALSE) +{ + args <- capture_args() + do.call(keras$callbacks$CSVLogger, args) +} + - do.call(keras$callbacks$ModelCheckpoint, args) +#' Stop training when a monitored metric has stopped improving. +#' +#' @description +#' Assuming the goal of a training is to minimize the loss. With this, the +#' metric to be monitored would be `'loss'`, and mode would be `'min'`. A +#' `model$fit()` training loop will check at end of every epoch whether +#' the loss is no longer decreasing, considering the `min_delta` and +#' `patience` if applicable. Once it's found no longer decreasing, +#' `model$stop_training` is marked `TRUE` and the training terminates. +#' +#' The quantity to be monitored needs to be available in `logs` list. +#' To make it so, pass the loss or metrics at `model$compile()`. +#' +#' # Examples +#' ```{r} +#' callback <- callback_early_stopping(monitor = 'loss', +#' patience = 3) +#' # This callback will stop the training when there is no improvement in +#' # the loss for three consecutive epochs. +#' model <- keras_model_sequential() %>% +#' layer_dense(10) +#' model %>% compile(optimizer = optimizer_sgd(), loss = 'mse') +#' history <- model %>% fit(x = op_ones(c(5, 20)), +#' y = op_zeros(5), +#' epochs = 10, batch_size = 1, +#' callbacks = list(callback), +#' verbose = 0) +#' nrow(as.data.frame(history)) # Only 4 epochs are run. +#' ``` +#' +#' @param monitor +#' Quantity to be monitored. Defaults to `"val_loss"`. +#' +#' @param min_delta +#' Minimum change in the monitored quantity to qualify as an +#' improvement, i.e. an absolute change of less than min_delta, will +#' count as no improvement. Defaults to `0`. +#' +#' @param patience +#' Number of epochs with no improvement after which training will +#' be stopped. Defaults to `0`. +#' +#' @param verbose +#' Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 displays +#' messages when the callback takes an action. Defaults to `0`. +#' +#' @param mode +#' One of `{"auto", "min", "max"}`. In `min` mode, training will stop +#' when the quantity monitored has stopped decreasing; in `"max"` mode +#' it will stop when the quantity monitored has stopped increasing; in +#' `"auto"` mode, the direction is automatically inferred from the name +#' of the monitored quantity. Defaults to `"auto"`. +#' +#' @param baseline +#' Baseline value for the monitored quantity. If not `NULL`, +#' training will stop if the model doesn't show improvement over the +#' baseline. Defaults to `NULL`. +#' +#' @param restore_best_weights +#' Whether to restore model weights from the epoch +#' with the best value of the monitored quantity. If `FALSE`, the model +#' weights obtained at the last step of training are used. An epoch +#' will be restored regardless of the performance relative to the +#' `baseline`. If no epoch improves on `baseline`, training will run +#' for `patience` epochs and restore weights from the best epoch in +#' that set. Defaults to `FALSE`. +#' +#' @param start_from_epoch +#' Number of epochs to wait before starting to monitor +#' improvement. This allows for a warm-up period in which no +#' improvement is expected and thus training will not be stopped. +#' Defaults to `0`. +#' +#' @inherit callback_backup_and_restore return +#' @export +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.EarlyStopping +callback_early_stopping <- +function (monitor = "val_loss", min_delta = 0L, patience = 0L, + verbose = 0L, mode = "auto", baseline = NULL, restore_best_weights = FALSE, + start_from_epoch = 0L) +{ + args <- capture_args(list(min_delta = as_integer, patience = as_integer, + verbose = as_integer, start_from_epoch = as_integer)) + do.call(keras$callbacks$EarlyStopping, args) } -#' Callback to back up and restore the training state +#' Callback for creating simple, custom callbacks on-the-fly. #' -#' @details -#' `BackupAndRestore` callback is intended to recover training from an -#' interruption that has happened in the middle of a `fit(model)` execution, by -#' backing up the training states in a temporary checkpoint file (with the help -#' of a `tf.train.CheckpointManager`), at the end of each epoch. Each backup -#' overwrites the previously written checkpoint file, so at any given time there -#' is at most one such checkpoint file for backup/restoring purpose. +#' @description +#' This callback is constructed with anonymous functions that will be called +#' at the appropriate time (during `Model.{fit | evaluate | predict}`). +#' Note that the callbacks expects positional arguments, as: #' -#' If training restarts before completion, the training state (which includes the -#' `Model` weights and epoch number) is restored to the most recently saved state -#' at the beginning of a new `fit()` run. At the completion of a `fit()` -#' run, the temporary checkpoint file is deleted. +#' - `on_epoch_begin` and `on_epoch_end` expect two positional arguments: +#' `epoch`, `logs` +#' - `on_train_begin` and `on_train_end` expect one positional argument: +#' `logs` +#' - `on_train_batch_begin` and `on_train_batch_end` expect two positional +#' arguments: `batch`, `logs` +#' - See `Callback` class definition for the full list of functions and their +#' expected arguments. #' -#' Note that the user is responsible to bring jobs back after the interruption. -#' This callback is important for the backup and restore mechanism for fault -#' tolerance purpose, and the model to be restored from an previous checkpoint is -#' expected to be the same as the one used to back up. If user changes arguments -#' passed to compile or fit, the checkpoint saved for fault tolerance can become -#' invalid. -#' -#' Note: -#' -#' 1. This callback is not compatible with eager execution disabled. -#' -#' 2. A checkpoint is saved at the end of each epoch. After restoring, -#' `fit()` redoes any partial work during the unfinished epoch in which the -#' training got restarted (so the work done before the interruption doesn't -#' affect the final model state). -#' -#' 3. This works for both single worker and multi-worker modes. When `fit()` -#' is used with `tf.distribute`, it supports `tf.distribute.MirroredStrategy`, -#' `tf.distribute.MultiWorkerMirroredStrategy`, `tf.distribute.TPUStrategy`, and -#' `tf.distribute.experimental.ParameterServerStrategy`. -#' -#' @param backup_dir String, path to store the checkpoint. -#' e.g. `backup_dir = normalizePath('./backup')` -#' This is the directory in which the system stores temporary files to -#' recover the model from jobs terminated unexpectedly. The directory -#' cannot be reused elsewhere to store other files, e.g. by -#' `BackupAndRestore` callback of another training, or by another callback -#' (`ModelCheckpoint`) of the same training. -#' @param ... For backwards and forwards compatibility +#' # Examples #' -#' @seealso -#' + +#' ```r +#' # Print the batch number at the beginning of every batch. +#' batch_print_callback <- callback_lambda( +#' on_train_batch_begin = function(batch, logs) { +#' print(batch) +#' } +#' ) #' -#' @export -callback_backup_and_restore <- -function(backup_dir) { - args <- capture_args(match.call(), NULL) - require_tf_version("2.8", "callback_backup_and_restore") - do.call(keras$callbacks$BackupAndRestore, args) -} - - -#' Stop training when a monitored quantity has stopped improving. +#' # Stream the epoch loss to a file in new-line delimited JSON format +#' # (one valid JSON object per line) +#' json_log <- file('loss_log.json', open = 'wt') +#' json_logging_callback <- callback_lambda( +#' on_epoch_end = function(epoch, logs) { +#' jsonlite::write_json( +#' list(epoch = epoch, loss = logs$loss), +#' json_log, +#' append = TRUE +#' ) +#' }, +#' on_train_end = function(logs) { +#' close(json_log) +#' } +#' ) #' -#' @inheritParams callback_model_checkpoint +#' # Terminate some processes after having finished model training. +#' processes <- ... +#' cleanup_callback <- callback_lambda( +#' on_train_end = function(logs) { +#' for (p in processes) { +#' if (is_alive(p)) { +#' terminate(p) +#' } +#' } +#' } +#' ) #' -#' @param monitor quantity to be monitored. -#' @param min_delta minimum change in the monitored quantity to qualify as an -#' improvement, i.e. an absolute change of less than min_delta, will count as -#' no improvement. -#' @param patience number of epochs with no improvement after which training -#' will be stopped. -#' @param mode one of "auto", "min", "max". In `min` mode, training will stop when -#' the quantity monitored has stopped decreasing; in `max` mode it will stop -#' when the quantity monitored has stopped increasing; in `auto` mode, the -#' direction is automatically inferred from the name of the monitored -#' quantity. -#' @param baseline Baseline value for the monitored quantity to reach. -#' Training will stop if the model doesn't show improvement -#' over the baseline. -#' @param restore_best_weights Whether to restore model weights from -#' the epoch with the best value of the monitored quantity. -#' If `FALSE`, the model weights obtained at the last step of -#' training are used. +#' model %>% fit( +#' ..., +#' callbacks = list( +#' batch_print_callback, +#' json_logging_callback, +#' cleanup_callback +#' ) +#' ) +#' ``` #' +#' @param on_epoch_begin +#' called at the beginning of every epoch. #' -#' @family callbacks +#' @param on_epoch_end +#' called at the end of every epoch. #' -#' @export -callback_early_stopping <- function(monitor = "val_loss", min_delta = 0, patience = 0, - verbose = 0, mode = c("auto", "min", "max"), - baseline = NULL, restore_best_weights = FALSE) { - - args <- list( - monitor = monitor, - min_delta = min_delta, - patience = as.integer(patience), - verbose = as.integer(verbose), - mode = match.arg(mode) - ) - - if (keras_version() >= "2.2") - args$baseline <- baseline - - if (keras_version() >= "2.2.3") - args$restore_best_weights <- restore_best_weights - - do.call(keras$callbacks$EarlyStopping, args) -} - - -#' Callback used to stream events to a server. +#' @param on_train_begin +#' called at the beginning of model training. #' -#' @param root root url of the target server. -#' @param path path relative to root to which the events will be sent. -#' @param field JSON field under which the data will be stored. -#' @param headers Optional named list of custom HTTP headers. Defaults to: -#' `list(Accept = "application/json", `Content-Type` = "application/json")` -#' @param send_as_json Whether the request should be sent as application/json. +#' @param on_train_end +#' called at the end of model training. #' -#' @details Events are sent to `root + '/publish/epoch/end/'` by default. Calls -#' are HTTP POST, with a `data` argument which is a JSON-encoded dictionary -#' of event data. If send_as_json is set to True, the content type of the -#' request will be application/json. Otherwise the serialized JSON will be -#' send within a form +#' @param on_train_batch_begin +#' called at the beginning of every train batch. #' -#' @family callbacks +#' @param on_train_batch_end +#' called at the end of every train batch. #' +#' @param ... +#' Any function in [`Callback()`] that you want to override by +#' passing `function_name = function`. For example, +#' `callback_lambda(.., on_train_end = train_end_fn)`. The custom function +#' needs to have same arguments as the ones defined in [`Callback()`]. +#' +#' @inherit callback_backup_and_restore return #' @export -callback_remote_monitor <- function(root = "https://localhost:9000", path = "/publish/epoch/end/", - field = "data", headers = NULL, send_as_json = FALSE) { - - if (!have_requests()) - stop("The requests Python package is required for remote monitoring.", - "Please run `reticulate::py_install('requests')`") - - args <- list( - root = root, - path = path, - field = field, - headers = headers - ) - - if (keras_version() >= "2.1.6") - args$send_as_json <- send_as_json +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.LambdaCallback +callback_lambda <- +function (on_epoch_begin = NULL, on_epoch_end = NULL, on_train_begin = NULL, + on_train_end = NULL, on_train_batch_begin = NULL, on_train_batch_end = NULL, + ...) +{ + args <- capture_args(list( + on_epoch_begin = as_callback_lambda_fn_sig_idx_logs, + on_epoch_end = as_callback_lambda_fn_sig_idx_logs, + on_train_begin = as_callback_lambda_fn_sig_logs, + on_train_end = as_callback_lambda_fn_sig_logs, + on_train_batch_begin = as_callback_lambda_fn_sig_idx_logs, + on_train_batch_end = as_callback_lambda_fn_sig_idx_logs, + on_test_begin = as_callback_lambda_fn_sig_logs, + on_test_end = as_callback_lambda_fn_sig_logs, + on_test_batch_begin = as_callback_lambda_fn_sig_idx_logs, + on_test_batch_end = as_callback_lambda_fn_sig_idx_logs, + on_predict_begin = as_callback_lambda_fn_sig_logs, + on_predict_end = as_callback_lambda_fn_sig_logs, + on_predict_batch_begin = as_callback_lambda_fn_sig_idx_logs, + on_predict_batch_end = as_callback_lambda_fn_sig_idx_logs + )) + do.call(keras$callbacks$LambdaCallback, args) +} - do.call(keras$callbacks$RemoteMonitor, args) +as_callback_lambda_fn_sig_idx_logs <- function(fn) { + tools <- import_callback_tools() + tools$wrap_sig_idx_logs(fn) } +as_callback_lambda_fn_sig_logs <- function(fn) { + tools <- import_callback_tools() + tools$wrap_sig_logs(fn) +} #' Learning rate scheduler. #' -#' @param schedule a function that takes an epoch index as input (integer, -#' indexed from 0) and current learning rate and returns a new learning rate -#' as output (float). +#' @description +#' At the beginning of every epoch, this callback gets the updated learning +#' rate value from `schedule` function provided, with the current +#' epoch and current learning rate, and applies the updated learning rate on +#' the optimizer. +#' +#' # Examples +#' ```{r} +#' # This function keeps the initial learning rate steady for the first ten epochs +#' # and decreases it exponentially after that. +#' scheduler <- function(epoch, lr) { +#' if (epoch < 10) +#' return(lr) +#' else +#' return(lr * exp(-0.1)) +#' } #' -#' @family callbacks +#' model <- keras_model_sequential() |> layer_dense(units = 10) +#' model |> compile(optimizer = optimizer_sgd(), loss = 'mse') +#' model$optimizer$learning_rate |> as.array() |> round(5) +#' ``` #' -#' @export -callback_learning_rate_scheduler <- function(schedule) { - keras$callbacks$LearningRateScheduler( - schedule = schedule - ) -} - - -#' Callback that terminates training when a NaN loss is encountered. +#' ```{r} +#' callback <- callback_learning_rate_scheduler(schedule = scheduler) +#' history <- model |> fit(x = array(runif(100), c(5, 20)), +#' y = array(0, c(5, 1)), +#' epochs = 15, callbacks = list(callback), verbose = 0) +#' model$optimizer$learning_rate |> as.array() |> round(5) +#' ``` #' -#' @family callbacks +#' @param schedule +#' A function that takes an epoch index (integer, indexed from 0) +#' and current learning rate (float) as inputs and returns a new +#' learning rate as output (float). #' +#' @param verbose +#' Integer. 0: quiet, 1: log update messages. +#' +#' @inherit callback_backup_and_restore return #' @export -callback_terminate_on_naan <- function() { - keras$callbacks$TerminateOnNaN() +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.LearningRateScheduler +callback_learning_rate_scheduler <- +function (schedule, verbose = 0L) +{ + args <- capture_args(list(verbose = as_integer)) + do.call(keras$callbacks$LearningRateScheduler, args) } -#' TensorBoard basic visualizations -#' -#' This callback writes a log for TensorBoard, which allows you to visualize -#' dynamic graphs of your training and test metrics, as well as activation -#' histograms for the different layers in your model. -#' -#' @param log_dir The path of the directory where to save the log files to be -#' parsed by Tensorboard. The default is `NULL`, which will use the active -#' run directory (if available) and otherwise will use "logs". -#' @param histogram_freq frequency (in epochs) at which to compute activation -#' histograms for the layers of the model. If set to 0, histograms won't be -#' computed. -#' @param batch_size size of batch of inputs to feed to the network -#' for histograms computation. No longer needed, ignored since TF 1.14. -#' @param write_graph whether to visualize the graph in Tensorboard. The log -#' file can become quite large when write_graph is set to `TRUE` -#' @param write_grads whether to visualize gradient histograms in TensorBoard. -#' `histogram_freq` must be greater than 0. -#' @param write_images whether to write model weights to visualize as image in -#' Tensorboard. -#' @param embeddings_freq frequency (in epochs) at which selected embedding -#' layers will be saved. -#' @param embeddings_layer_names a list of names of layers to keep eye on. If -#' `NULL` or empty list all the embedding layers will be watched. -#' @param embeddings_metadata a named list which maps layer name to a file name in -#' which metadata for this embedding layer is saved. See the -#' [details](https://www.tensorflow.org/tensorboard/tensorboard_projector_plugin#saving_data_for_tensorboard) -#' about the metadata file format. In case if the same metadata file is used -#' for all embedding layers, string can be passed. -#' @param embeddings_data Data to be embedded at layers specified in -#' `embeddings_layer_names`. Array (if the model has a single input) or list -#' of arrays (if the model has multiple inputs). Learn [more about embeddings](https://www.tensorflow.org/text/guide/word_embeddings) -#' @param update_freq `'batch'` or `'epoch'` or integer. When using `'batch'`, writes -#' the losses and metrics to TensorBoard after each batch. The same -#' applies for `'epoch'`. If using an integer, let's say `10000`, -#' the callback will write the metrics and losses to TensorBoard every -#' 10000 samples. Note that writing too frequently to TensorBoard -#' can slow down your training. -#' @param profile_batch Profile the batch to sample compute characteristics. By -#' default, it will disbale profiling. Set profile_batch=2 profile the second -#' batch. Must run in TensorFlow eager mode. (TF >= 1.14) -#' -#' @details TensorBoard is a visualization tool provided with TensorFlow. -#' -#' You can find more information about TensorBoard -#' [here](https://www.tensorflow.org/tensorboard/get_started). -#' -#' When using a backend other than TensorFlow, TensorBoard will still work -#' (if you have TensorFlow installed), but the only feature available will -#' be the display of the losses and metrics plots. +#' Callback to save the Keras model or model weights at some frequency. +#' +#' @description +#' `callback_model_checkpoint()` is used in conjunction with training using +#' `model |> fit()` to save a model or weights (in a checkpoint file) at some +#' interval, so the model or weights can be loaded later to continue the +#' training from the state saved. +#' +#' A few options this callback provides include: +#' +#' - Whether to only keep the model that has achieved the "best performance" so +#' far, or whether to save the model at the end of every epoch regardless of +#' performance. +#' - Definition of "best"; which quantity to monitor and whether it should be +#' maximized or minimized. +#' - The frequency it should save at. Currently, the callback supports saving +#' at the end of every epoch, or after a fixed number of training batches. +#' - Whether only weights are saved, or the whole model is saved. +#' +#' # Examples +#' ```{r} +#' model <- keras_model_sequential(input_shape = c(10)) |> +#' layer_dense(1, activation = "sigmoid") |> +#' compile(loss = "binary_crossentropy", optimizer = "adam", +#' metrics = c('accuracy')) +#' +#' EPOCHS <- 10 +#' checkpoint_filepath <- tempfile('checkpoint-model-', fileext = ".keras") +#' model_checkpoint_callback <- callback_model_checkpoint( +#' filepath = checkpoint_filepath, +#' monitor = 'val_accuracy', +#' mode = 'max', +#' save_best_only = TRUE +#' ) #' -#' @family callbacks +#' # Model is saved at the end of every epoch, if it's the best seen so far. +#' model |> fit(x = random_uniform(c(2, 10)), y = op_ones(2, 1), +#' epochs = EPOCHS, validation_split = .5, verbose = 0, +#' callbacks = list(model_checkpoint_callback)) +#' +#' # The model (that are considered the best) can be loaded as - +#' load_model(checkpoint_filepath) +#' +#' # Alternatively, one could checkpoint just the model weights as - +#' checkpoint_filepath <- tempfile('checkpoint-', fileext = ".weights.h5") +#' model_checkpoint_callback <- callback_model_checkpoint( +#' filepath = checkpoint_filepath, +#' save_weights_only = TRUE, +#' monitor = 'val_accuracy', +#' mode = 'max', +#' save_best_only = TRUE +#' ) #' +#' # Model weights are saved at the end of every epoch, if it's the best seen +#' # so far. +#' # same as above +#' model |> fit(x = random_uniform(c(2, 10)), y = op_ones(2, 1), +#' epochs = EPOCHS, validation_split = .5, verbose = 0, +#' callbacks = list(model_checkpoint_callback)) +#' +#' # The model weights (that are considered the best) can be loaded +#' model |> load_model_weights(checkpoint_filepath) +#' ``` +#' +#' @param filepath +#' string, path to save the model file. +#' `filepath` can contain named formatting options, +#' which will be filled the value of `epoch` and keys in `logs` +#' (passed in `on_epoch_end`). +#' The `filepath` name needs to end with `".weights.h5"` when +#' `save_weights_only = TRUE` or should end with `".keras"` when +#' checkpoint saving the whole model (default). +#' For example: +#' if `filepath` is `"{epoch:02d}-{val_loss:.2f}.keras"`, then the +#' model checkpoints will be saved with the epoch number and the +#' validation loss in the filename. The directory of the filepath +#' should not be reused by any other callbacks to avoid conflicts. +#' +#' @param monitor +#' The metric name to monitor. Typically the metrics are set by +#' the `model |> compile()` method. Note: +#' * Prefix the name with `"val_"` to monitor validation metrics. +#' * Use `"loss"` or `"val_loss"` to monitor the model's total loss. +#' * If you specify metrics as strings, like `"accuracy"`, pass the +#' same string (with or without the `"val_"` prefix). +#' * If you pass `Metric` objects (created by one of `metric_*()`), `monitor` should be set to +#' `metric$name`. +#' * If you're not sure about the metric names you can check the +#' contents of the `history$metrics` list returned by +#' `history <- model |> fit()` +#' * Multi-output models set additional prefixes on the metric names. +#' +#' @param verbose +#' Verbosity mode, 0 or 1. Mode 0 is silent, and mode 1 +#' displays messages when the callback takes an action. +#' +#' @param save_best_only +#' if `save_best_only = TRUE`, it only saves when the model +#' is considered the "best" and the latest best model according to the +#' quantity monitored will not be overwritten. If `filepath` doesn't +#' contain formatting options like `{epoch}` then `filepath` will be +#' overwritten by each new better model. +#' +#' @param mode +#' one of \{`"auto"`, `"min"`, `"max"`\}. If `save_best_only = TRUE`, the +#' decision to overwrite the current save file is made based on either +#' the maximization or the minimization of the monitored quantity. +#' For `val_acc`, this should be `"max"`, for `val_loss` this should be +#' `"min"`, etc. In `"auto"` mode, the mode is set to `"max"` if the +#' quantities monitored are `"acc"` or start with `"fmeasure"` and are +#' set to `"min"` for the rest of the quantities. +#' +#' @param save_weights_only +#' if TRUE, then only the model's weights will be saved +#' (`model |> save_model_weights(filepath)`), else the full model is saved +#' (`model |> save_model(filepath)`). +#' +#' @param save_freq +#' `"epoch"` or integer. When using `"epoch"`, the callback +#' saves the model after each epoch. When using integer, the callback +#' saves the model at end of this many batches. If the `Model` is +#' compiled with `steps_per_execution = N`, then the saving criteria will +#' be checked every Nth batch. Note that if the saving isn't aligned to +#' epochs, the monitored metric may potentially be less reliable (it +#' could reflect as little as 1 batch, since the metrics get reset +#' every epoch). Defaults to `"epoch"`. +#' +#' @param initial_value_threshold +#' Floating point initial "best" value of the +#' metric to be monitored. Only applies if `save_best_value = TRUE`. Only +#' overwrites the model weights already saved if the performance of +#' current model is better than this value. +#' +#' @inherit callback_backup_and_restore return #' @export -callback_tensorboard <- function(log_dir = NULL, histogram_freq = 0, - batch_size = NULL, - write_graph = TRUE, - write_grads = FALSE, - write_images = FALSE, - embeddings_freq = 0, - embeddings_layer_names = NULL, - embeddings_metadata = NULL, - embeddings_data = NULL, - update_freq = "epoch", - profile_batch = 0) { - - # establish the log_dir - if (is.null(log_dir)) { - if (tfruns::is_run_active()) - log_dir <- file.path(tfruns::run_dir(), "logs") - else - log_dir <- "logs" - } - - args <- list( - log_dir = normalize_path(log_dir), - histogram_freq = as.integer(histogram_freq), - write_graph = write_graph, - write_images = write_images - ) - - if (tensorflow::tf_version() >= 1.14) { - args[["profile_batch"]] = as.integer(profile_batch) - } else if (profile_batch > 0) { - warning("profile_batch can only be used with TensorFlow >= 1.14", call. = FALSE) - } - - if (!missing(embeddings_data) && keras_version() < "2.2.0") - stop("embeddings_data requires keras >= 2.2. Please update with install_keras()") - - # embeddings arguments seem to have been excluded in the TF implementation - # (even though they are stil part of the docs there) - if (!is_tensorflow_implementation()) { - args$embeddings_freq <- as.integer(embeddings_freq) - args$embeddings_layer_names <- embeddings_layer_names - args$embeddings_metadata <- embeddings_metadata - args$embeddings_data <- embeddings_data - } - - if (keras_version() >= "2.0.5" & tensorflow::tf_version() < "1.14") { - - if (is.null(batch_size)) - batch_size <- 32L - - args$batch_size <- as.integer(batch_size) - args$write_grads <- write_grads - } else if (!is.null(batch_size)) { - warning("Batch size is ignored since TensorFlow 1.14.0") - } +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.ModelCheckpoint +callback_model_checkpoint <- +function (filepath, monitor = "val_loss", verbose = 0L, save_best_only = FALSE, + save_weights_only = FALSE, mode = "auto", save_freq = "epoch", + initial_value_threshold = NULL) +{ + args <- capture_args(list(verbose = as_integer, save_freq = as_integer)) + do.call(keras$callbacks$ModelCheckpoint, args) +} - if (keras_version() >= "2.2.3") - args$update_freq <- update_freq - do.call(keras$callbacks$TensorBoard, args) +# Callback that prints metrics to stdout. +# +# @description +# +# # Raises +# ValueError: In case of invalid `count_mode`. +# +# @param count_mode +# One of `"steps"` or `"samples"`. +# Whether the progress bar should +# count samples seen or steps (batches) seen. +# +# @export +# @noRd +# @family callbacks +# @seealso +# + +# + +# @tether keras.callbacks.ProgbarLogger +# callback_progbar_logger <- +function (count_mode = NULL) +{ + args <- capture_args() + do.call(keras$callbacks$ProgbarLogger, args) } #' Reduce learning rate when a metric has stopped improving. #' -#' Models often benefit from reducing the learning rate by a factor of 2-10 once -#' learning stagnates. This callback monitors a quantity and if no improvement -#' is seen for a 'patience' number of epochs, the learning rate is reduced. -#' -#' @param monitor quantity to be monitored. -#' @param factor factor by which the learning rate will be reduced. new_lr = lr -#' * factor -#' @param patience number of epochs with no improvement after which learning -#' rate will be reduced. -#' @param verbose int. 0: quiet, 1: update messages. -#' @param mode one of "auto", "min", "max". In min mode, lr will be reduced when -#' the quantity monitored has stopped decreasing; in max mode it will be -#' reduced when the quantity monitored has stopped increasing; in auto mode, -#' the direction is automatically inferred from the name of the monitored -#' quantity. -#' @param min_delta threshold for measuring the new optimum, to only focus on -#' significant changes. -#' @param cooldown number of epochs to wait before resuming normal operation -#' after lr has been reduced. -#' @param min_lr lower bound on the learning rate. +#' @description +#' Models often benefit from reducing the learning rate by a factor +#' of 2-10 once learning stagnates. This callback monitors a +#' quantity and if no improvement is seen for a 'patience' number +#' of epochs, the learning rate is reduced. #' -#' @family callbacks +#' # Examples +#' ```{r, eval = FALSE} +#' reduce_lr <- callback_reduce_lr_on_plateau(monitor = 'val_loss', factor = 0.2, +#' patience = 5, min_lr = 0.001) +#' model %>% fit(x_train, y_train, callbacks = list(reduce_lr)) +#' ``` #' -#' @export -callback_reduce_lr_on_plateau <- function(monitor = "val_loss", factor = 0.1, patience = 10, - verbose = 0, mode = c("auto", "min", "max"), - min_delta = 0.0001, cooldown = 0, min_lr = 0.0) { - - args <- list( - monitor = monitor, - factor = factor, - patience = as.integer(patience), - verbose = as.integer(verbose), - mode = match.arg(mode), - cooldown = as.integer(cooldown), - min_lr = min_lr - ) - - if (keras_version() >= "2.1.6") - args$min_delta <- min_delta - else - args$epsilon <- min_delta - - do.call(keras$callbacks$ReduceLROnPlateau, args) -} - -#' Callback that streams epoch results to a csv file +#' @param monitor +#' String. Quantity to be monitored. #' -#' Supports all values that can be represented as a string +#' @param factor +#' Float. Factor by which the learning rate will be reduced. +#' `new_lr = lr * factor`. #' -#' @param filename filename of the csv file, e.g. 'run/log.csv'. -#' @param separator string used to separate elements in the csv file. -#' @param append `TRUE`: append if file exists (useful for continuing training). -#' `FALSE`: overwrite existing file, +#' @param patience +#' Integer. Number of epochs with no improvement after which +#' learning rate will be reduced. #' -#' @family callbacks +#' @param verbose +#' Integer. 0: quiet, 1: update messages. #' +#' @param mode +#' String. One of `{'auto', 'min', 'max'}`. In `'min'` mode, +#' the learning rate will be reduced when the +#' quantity monitored has stopped decreasing; in `'max'` mode it will +#' be reduced when the quantity monitored has stopped increasing; in +#' `'auto'` mode, the direction is automatically inferred from the name +#' of the monitored quantity. +#' +#' @param min_delta +#' Float. Threshold for measuring the new optimum, to only focus +#' on significant changes. +#' +#' @param cooldown +#' Integer. Number of epochs to wait before resuming normal +#' operation after the learning rate has been reduced. +#' +#' @param min_lr +#' Float. Lower bound on the learning rate. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit callback_backup_and_restore return #' @export -callback_csv_logger <- function(filename, separator = ",", append = FALSE) { - keras$callbacks$CSVLogger( - filename = normalize_path(filename), - separator = separator, - append = append - ) +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.ReduceLROnPlateau +callback_reduce_lr_on_plateau <- +function (monitor = "val_loss", factor = 0.1, patience = 10L, + verbose = 0L, mode = "auto", min_delta = 1e-04, cooldown = 0L, + min_lr = 0, ...) +{ + args <- capture_args(list(patience = as_integer, verbose = as_integer, + cooldown = as_integer)) + do.call(keras$callbacks$ReduceLROnPlateau, args) } - -#' Create a custom callback -#' -#' This callback is constructed with anonymous functions that will be called at -#' the appropriate time. Note that the callbacks expects positional arguments, -#' as: -#' -#' - `on_epoch_begin` and `on_epoch_end` expect two positional arguments: `epoch`, `logs` -#' - `on_batch_*`, `on_train_batch_*`, `on_predict_batch_*` and `on_test_batch_*`, expect -#' two positional arguments: `batch`, `logs` -#' - `on_train_*`, `on_test_*` and `on_predict_*` expect one positional argument: `logs` -#' -#' @param on_epoch_begin called at the beginning of every epoch. -#' @param on_epoch_end called at the end of every epoch. -#' @param on_batch_begin called at the beginning of every training batch. -#' @param on_batch_end called at the end of every training batch. -#' @param on_train_batch_begin called at the beginning of every batch. -#' @param on_train_batch_end called at the end of every batch. -#' @param on_train_begin called at the beginning of model training. -#' @param on_train_end called at the end of model training. -#' @param on_predict_batch_begin called at the beginning of a batch in predict methods. -#' @param on_predict_batch_end called at the end of a batch in predict methods. -#' @param on_predict_begin called at the beginning of prediction. -#' @param on_predict_end called at the end of prediction. -#' @param on_test_batch_begin called at the beginning of a batch in evaluate methods. -#' Also called at the beginning of a validation batch in the fit methods, -#' if validation data is provided. -#' @param on_test_batch_end called at the end of a batch in evaluate methods. -#' Also called at the end of a validation batch in the fit methods, -#' if validation data is provided. -#' @param on_test_begin called at the beginning of evaluation or validation. -#' @param on_test_end called at the end of evaluation or validation. +#' Callback used to stream events to a server. #' -#' @family callbacks +#' @description +#' Requires the `requests` library. +#' Events are sent to `root + '/publish/epoch/end/'` by default. Calls are +#' HTTP POST, with a `data` argument which is a +#' JSON-encoded named list of event data. +#' If `send_as_json = TRUE`, the content type of the request will be +#' `"application/json"`. +#' Otherwise the serialized JSON will be sent within a form. +#' +#' @param root +#' String; root url of the target server. +#' +#' @param path +#' String; path relative to `root` to which the events will be sent. +#' +#' @param field +#' String; JSON field under which the data will be stored. +#' The field is used only if the payload is sent within a form +#' (i.e. when `send_as_json = FALSE`). #' +#' @param headers +#' Named list; optional custom HTTP headers. +#' +#' @param send_as_json +#' Boolean; whether the request should be +#' sent as `"application/json"`. +#' +#' @inherit callback_backup_and_restore return #' @export -callback_lambda <- function(on_epoch_begin = NULL, on_epoch_end = NULL, - on_batch_begin = NULL, on_batch_end = NULL, - on_train_batch_begin = NULL, on_train_batch_end = NULL, - on_train_begin = NULL, on_train_end = NULL, - on_predict_batch_begin = NULL, on_predict_batch_end = NULL, - on_predict_begin = NULL, on_predict_end = NULL, - on_test_batch_begin = NULL, on_test_batch_end = NULL, - on_test_begin = NULL, on_test_end = NULL - ) { - - - args <- list( - on_epoch_begin = on_epoch_begin, - on_epoch_end = on_epoch_end, - on_batch_begin = on_batch_begin, - on_batch_end = on_batch_end, - on_train_begin = on_train_begin, - on_train_end = on_train_end, - on_train_batch_begin = on_train_batch_begin, - on_train_batch_end = on_train_batch_end, - on_predict_batch_begin = on_predict_batch_begin, - on_predict_batch_end = on_predict_batch_end, - on_predict_begin = on_predict_begin, - on_test_batch_begin = on_test_batch_begin, - on_test_batch_end = on_test_batch_end, - on_test_begin = on_test_begin, - on_test_end = on_test_end - ) - - # remove NULL arguments from args. - args <- Filter(function(x) !is.null(x), args) - warn_callback(args) - - do.call(keras$callbacks$LambdaCallback, args) +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.RemoteMonitor +callback_remote_monitor <- +function (root = "http://localhost:9000", path = "/publish/epoch/end/", + field = "data", headers = NULL, send_as_json = FALSE) +{ + args <- capture_args() + do.call(keras$callbacks$RemoteMonitor, args) } -#' (Deprecated) Base R6 class for Keras callbacks -#' -#' New custom callbacks implemented as R6 classes are encouraged to inherit from -#' `keras$callbacks$Callback` directly. -#' -#' @docType class + +#' Enable visualizations for TensorBoard. #' -#' @format An [R6Class] generator object +#' @description +#' TensorBoard is a visualization tool provided with TensorFlow. A TensorFlow +#' installation is required to use this callback. #' -#' @field params Named list with training parameters (eg. verbosity, batch size, number of epochs...). -#' @field model Reference to the Keras model being trained. +#' This callback logs events for TensorBoard, including: #' -#' @section Methods: -#' \describe{ -#' \item{\code{on_epoch_begin(epoch, logs)}}{Called at the beginning of each epoch.} -#' \item{\code{on_epoch_end(epoch, logs)}}{Called at the end of each epoch.} -#' \item{\code{on_batch_begin(batch, logs)}}{Called at the beginning of each batch.} -#' \item{\code{on_batch_end(batch, logs)}}{Called at the end of each batch.} -#' \item{\code{on_train_begin(logs)}}{Called at the beginning of training.} -#' \item{\code{on_train_end(logs)}}{Called at the end of training.} -#' } +#' * Metrics summary plots +#' * Training graph visualization +#' * Weight histograms +#' * Sampled profiling #' -#' @details The `logs` named list that callback methods take as argument will -#' contain keys for quantities relevant to the current batch or epoch. +#' When used in `model |> evaluate()` or regular validation +#' in addition to epoch summaries, there will be a summary that records +#' evaluation metrics vs `model$optimizer$iterations` written. The metric names +#' will be prepended with `evaluation`, with `model$optimizer$iterations` being +#' the step in the visualized TensorBoard. #' -#' Currently, the `fit.keras.engine.training.Model()` method for sequential -#' models will include the following quantities in the `logs` that -#' it passes to its callbacks: +#' If you have installed TensorFlow with `pip` or `reticulate::py_install()`, you should be able +#' to launch TensorBoard from the command line: #' -#' - `on_epoch_end`: logs include `acc` and `loss`, and optionally include `val_loss` (if validation is enabled in `fit`), and `val_acc` (if validation and accuracy monitoring are enabled). -#' - `on_batch_begin`: logs include `size`, the number of samples in the current batch. -#' - `on_batch_end`: logs include `loss`, and optionally `acc` (if accuracy monitoring is enabled). +#' ``` +#' tensorboard --logdir=path_to_your_logs +#' ``` +#' or from R with `tensorflow::tensorboard()`. #' -#' @return [KerasCallback]. -#' @keywords internal -#' @examples -#' \dontrun{ -#' library(keras) +#' You can find more information about TensorBoard +#' [here](https://www.tensorflow.org/get_started/summaries_and_tensorboard). +#' +#' # Examples +#' +#' ```{r, eval = FALSE} +#' tensorboard_callback <- callback_tensorboard(log_dir = "./logs") +#' model %>% fit(x_train, y_train, epochs = 2, callbacks = list(tensorboard_callback)) +#' # Then run the tensorboard command to view the visualizations. +#' ``` +#' +#' Custom batch-level summaries in a subclassed Model: +#' +#' ```{r, eval = FALSE} +#' MyModel <- new_model_class("MyModel", +#' initialize = function() { +#' self$dense <- layer_dense(units = 10) +#' }, +#' call = function(x) { +#' outputs <- x |> self$dense() +#' tf$summary$histogram('outputs', outputs) +#' outputs +#' } +#' ) #' -#' LossHistory <- R6::R6Class("LossHistory", -#' inherit = KerasCallback, +#' model <- MyModel() +#' model |> compile(optimizer = 'sgd', loss = 'mse') #' -#' public = list( +#' # Make sure to set `update_freq = N` to log a batch-level summary every N +#' # batches. In addition to any `tf$summary` contained in `model$call()`, +#' # metrics added in `model |>compile` will be logged every N batches. +#' tb_callback <- callback_tensorboard(log_dir = './logs', update_freq = 1) +#' model |> fit(x_train, y_train, callbacks = list(tb_callback)) +#' ``` #' -#' losses = NULL, +#' Custom batch-level summaries in a Functional API Model: #' -#' on_batch_end = function(batch, logs = list()) { -#' self$losses <- c(self$losses, logs[["loss"]]) -#' } -#' ) -#' ) +#' ```{r, eval = FALSE} +#' my_summary <- function(x) { +#' tf$summary$histogram('x', x) +#' x #' } +#' +#' inputs <- layer_input(10) +#' outputs <- inputs |> +#' layer_dense(10) |> +#' layer_lambda(my_summary) +#' +#' model <- keras_model(inputs, outputs) +#' model |> compile(optimizer = 'sgd', loss = 'mse') +#' +#' # Make sure to set `update_freq = N` to log a batch-level summary every N +#' # batches. In addition to any `tf.summary` contained in `Model.call`, +#' # metrics added in `Model.compile` will be logged every N batches. +#' tb_callback <- callback_tensorboard(log_dir = './logs', update_freq = 1) +#' model |> fit(x_train, y_train, callbacks = list(tb_callback)) +#' ``` +#' +#' Profiling: +#' +#' ```{r, eval = FALSE} +#' # Profile a single batch, e.g. the 5th batch. +#' tensorboard_callback <- callback_tensorboard( +#' log_dir = './logs', profile_batch = 5) +#' model |> fit(x_train, y_train, epochs = 2, +#' callbacks = list(tensorboard_callback)) +#' +#' # Profile a range of batches, e.g. from 10 to 20. +#' tensorboard_callback <- callback_tensorboard( +#' log_dir = './logs', profile_batch = c(10, 20)) +#' model |> fit(x_train, y_train, epochs = 2, +#' callbacks = list(tensorboard_callback)) +#' ``` +#' +#' @param log_dir +#' the path of the directory where to save the log files to be +#' parsed by TensorBoard. e.g., +#' `log_dir = file.path(working_dir, 'logs')`. +#' This directory should not be reused by any other callbacks. +#' +#' @param histogram_freq +#' frequency (in epochs) at which to compute +#' weight histograms for the layers of the model. If set to 0, +#' histograms won't be computed. Validation data (or split) must be +#' specified for histogram visualizations. +#' +#' @param write_graph +#' (Not supported at this time) +#' Whether to visualize the graph in TensorBoard. +#' Note that the log file can become quite large +#' when `write_graph` is set to `TRUE`. +#' +#' @param write_images +#' whether to write model weights to visualize as image in +#' TensorBoard. +#' +#' @param write_steps_per_second +#' whether to log the training steps per second +#' into TensorBoard. This supports both epoch and batch frequency +#' logging. +#' +#' @param update_freq +#' `"batch"` or `"epoch"` or integer. When using `"epoch"`, +#' writes the losses and metrics to TensorBoard after every epoch. +#' If using an integer, let's say `1000`, all metrics and losses +#' (including custom ones added by `Model.compile`) will be logged to +#' TensorBoard every 1000 batches. `"batch"` is a synonym for 1, +#' meaning that they will be written every batch. +#' Note however that writing too frequently to TensorBoard can slow +#' down your training, especially when used with distribution +#' strategies as it will incur additional synchronization overhead. +#' Batch-level summary writing is also available via `train_step` +#' override. Please see +#' [TensorBoard Scalars tutorial]( +#' https://www.tensorflow.org/tensorboard/scalars_and_keras#batch-level_logging) # noqa: E501 +#' for more details. +#' +#' @param profile_batch +#' (Not supported at this time) +#' Profile the batch(es) to sample compute characteristics. +#' profile_batch must be a non-negative integer or a tuple of integers. +#' A pair of positive integers signify a range of batches to profile. +#' By default, profiling is disabled. +#' +#' @param embeddings_freq +#' frequency (in epochs) at which embedding layers will be +#' visualized. If set to 0, embeddings won't be visualized. +#' +#' @param embeddings_metadata +#' Named list which maps embedding layer names to the +#' filename of a file in which to save metadata for the embedding layer. +#' In case the same metadata file is to be +#' used for all embedding layers, a single filename can be passed. +#' +#' @inherit callback_backup_and_restore return #' @export -KerasCallback <- R6Class("KerasCallback", - - public = list( - - params = NULL, - model = NULL, - - set_context = function(params = NULL, model = NULL) { - self$params <- params - self$model <- model - }, - - on_epoch_begin = function(epoch, logs = NULL) { - - }, - - on_epoch_end = function(epoch, logs = NULL) { - - }, - - on_batch_begin = function(batch, logs = NULL) { - - }, - - on_batch_end = function(batch, logs = NULL) { - - }, - - on_train_begin = function(logs = NULL) { - - }, - - on_train_end = function(logs = NULL) { - - }, - - on_predict_batch_begin = function(batch, logs = NULL) { - - }, - - on_predict_batch_end = function(batch, logs = NULL) { - - }, - - on_predict_begin = function(logs = NULL) { - - }, - - on_predict_end = function(logs = NULL) { - - }, - - on_test_batch_begin = function(batch, logs = NULL) { - - }, - - on_test_batch_end = function(batch, logs = NULL) { - - }, - - on_test_begin = function(logs = NULL) { - - }, - - on_test_end = function(logs = NULL) { - - }, - - on_train_batch_begin = function(batch, logs = NULL) { - - }, - - on_train_batch_end = function(batch, logs = NULL) { - - } - - ) -) - -normalize_callbacks_with_metrics <- function(view_metrics, initial_epoch, callbacks) { - - # if callbacks isn't a list then make it one - if (!is.null(callbacks) && !is.list(callbacks)) - callbacks <- list(callbacks) - - # always include the metrics callback - if (tensorflow::tf_version() >= "2.2.0") - metrics_callback <- KerasMetricsCallbackV2$new(view_metrics, initial_epoch) - else - metrics_callback <- KerasMetricsCallback$new(view_metrics) - - callbacks <- append(callbacks, metrics_callback) - - normalize_callbacks(callbacks) +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.TensorBoard +callback_tensorboard <- +function (log_dir = "logs", histogram_freq = 0L, write_graph = TRUE, + write_images = FALSE, write_steps_per_second = FALSE, update_freq = "epoch", + profile_batch = 0L, embeddings_freq = 0L, embeddings_metadata = NULL) +{ + args <- capture_args(list(histogram_freq = as_integer, update_freq = as_integer, + profile_batch = as_integer, embeddings_freq = as_integer)) + do.call(keras$callbacks$TensorBoard, args) } -warn_callback <- function(callback) { - - new_callbacks <- c("on_predict_batch_begin", "on_predict_batch_end", - "on_predict_begin", "on_predict_end", - "on_test_batch_begin", "on_test_batch_end", - "on_test_begin", "on_test_end", - "on_train_batch_begin", "on_train_batch_end" - ) - - lapply(new_callbacks, function(x) { +#' Callback that terminates training when a NaN loss is encountered. +#' +#' @inherit callback_backup_and_restore return +#' @export +#' @family callbacks +#' @seealso +#' + +# + +#' @tether keras.callbacks.TerminateOnNaN +callback_terminate_on_nan <- +function () +{ + args <- capture_args() + do.call(keras$callbacks$TerminateOnNaN, args) +} - if (!(get_keras_implementation() == "tensorflow" && - tensorflow::tf_version() >= "2.0")) { - - if (inherits(callback, "KerasCallback")) { - - # workaround to find out if the body is empty as expected. - bdy <- paste(as.character(body(callback[[x]])), collapse = "") - if (is.null(body) || bdy != "{") { - warning("Callback '", x, "' only works with Keras TensorFlow", - " implementation and Tensorflow >= 2.0") - } +#' Swaps model weights and EMA weights before and after evaluation. +#' +#' @description +#' This callbacks replaces the model's weight values with the values of +#' the optimizer's EMA weights (the exponential moving average of the past +#' model weights values, implementing "Polyak averaging") before model +#' evaluation, and restores the previous weights after evaluation. +#' +#' The `SwapEMAWeights` callback is to be used in conjunction with +#' an optimizer that sets `use_ema = TRUE`. +#' +#' Note that the weights are swapped in-place in order to save memory. +#' The behavior is undefined if you modify the EMA weights +#' or model weights in other callbacks. +#' +#' # Examples +#' ```r +#' # Remember to set `use_ema=TRUE` in the optimizer +#' optimizer <- optimizer_sgd(use_ema = TRUE) +#' model |> compile(optimizer = optimizer, loss = ..., metrics = ...) +#' +#' # Metrics will be computed with EMA weights +#' model |> fit(X_train, Y_train, +#' callbacks = c(callback_swap_ema_weights())) +#' +#' # If you want to save model checkpoint with EMA weights, you can set +#' # `swap_on_epoch=TRUE` and place ModelCheckpoint after SwapEMAWeights. +#' model |> fit( +#' X_train, Y_train, +#' callbacks = c( +#' callback_swap_ema_weights(swap_on_epoch = TRUE), +#' callback_model_checkpoint(...) +#' ) +#' ) +#' ``` +#' +#' @param swap_on_epoch +#' Whether to perform swapping at `on_epoch_begin()` +#' and `on_epoch_end()`. This is useful if you want to use +#' EMA weights for other callbacks such as `callback_model_checkpoint()`. +#' Defaults to `FALSE`. +#' +#' @inherit callback_backup_and_restore return +#' @export +#' @family callbacks +#' @tether keras.callbacks.SwapEMAWeights +# @seealso +# + +callback_swap_ema_weights <- +function (swap_on_epoch = FALSE) +{ + args <- capture_args(NULL) + do.call(keras$callbacks$SwapEMAWeights, args) +} - } else if (inherits(callback, "list")) { - if (!is.null(callback[[x]])) { - warning("Callback '", x, "' only works with Keras TensorFlow", - " implementation and Tensorflow >= 2.0") - } +# -------------------------------------------------------------------------------- - } - } - }) +normalize_callbacks_with_metrics <- function(view_metrics, initial_epoch, callbacks) { - invisible(NULL) -} + # include the metrics callback if needed + if (view_metrics || tfruns::is_run_active()) + append(callbacks) <- callback_view_metrics(view_metrics, initial_epoch) -normalize_callbacks <- function(callbacks) { - - # if callbacks isn't a list then make it one - if (!is.null(callbacks) && !is.list(callbacks)) - callbacks <- list(callbacks) - - # import callback utility module - python_path <- system.file("python", package = "keras") - tools <- import_from_path("kerastools", path = python_path) - - # convert R callbacks to Python and check whether the user - # has already included the tensorboard callback - have_tensorboard_callback <- FALSE - callbacks <- lapply(callbacks, function(callback) { - - warn_callback(callback) - - # track whether we have a TensorBoard callback - if (inherits(callback, "keras.callbacks.TensorBoard")) - have_tensorboard_callback <<- TRUE - - if (inherits(callback, "KerasCallback")) { - - args <- list( - r_set_context = callback$set_context, - r_on_epoch_begin = callback$on_epoch_begin, - r_on_epoch_end = callback$on_epoch_end, - r_on_train_begin = callback$on_train_begin, - r_on_train_end = callback$on_train_end, - r_on_batch_begin = callback$on_batch_begin, - r_on_batch_end = callback$on_batch_end, - r_on_predict_batch_begin = callback$on_predict_batch_begin, - r_on_predict_batch_end = callback$on_predict_batch_end, - r_on_predict_begin = callback$on_predict_begin, - r_on_predict_end = callback$on_predict_end, - r_on_test_batch_begin = callback$on_test_batch_begin, - r_on_test_batch_end = callback$on_test_batch_end, - r_on_test_begin = callback$on_test_begin, - r_on_test_end = callback$on_test_end, - r_on_train_batch_begin = callback$on_train_batch_begin, - r_on_train_batch_end = callback$on_train_batch_end - ) - - # on_batch_* -> on_train_batch_* - if (!isTRUE(all.equal(callback$on_batch_begin, empty_fun))) { - args$r_on_train_batch_begin <- callback$on_batch_begin - } + # add the tensorboard callback if necessary + if (tfruns::is_run_active() || nzchar(Sys.getenv("RUN_DIR"))) { - if (!isTRUE(all.equal(callback$on_batch_end, empty_fun))) { - args$r_on_train_batch_end <- callback$on_batch_end + have_tensorboard_callback <- FALSE + for (callback in callbacks) + if (inherits(callback, "keras.src.callbacks.tensorboard.TensorBoard")) { + have_tensorboard_callback <- TRUE + break } - # create a python callback to map to our R callback - do.call(tools$callback$RCallback, args) - } else { - callback - } - }) + if (!have_tensorboard_callback) + append(callbacks) <- callback_tensorboard() - # add the tensorboard callback if necessary - if (is_backend("tensorflow") && tfruns::is_run_active() && !have_tensorboard_callback) - callbacks <- append(callbacks, callback_tensorboard()) + } - # return the callbacks callbacks } -empty_fun <- function(batch, logs = NULL) {} - - diff --git a/R/config.R b/R/config.R new file mode 100644 index 0000000000..4de657fbe9 --- /dev/null +++ b/R/config.R @@ -0,0 +1,518 @@ + + + +#' Publicly accessible method for determining the current backend. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' config_backend() +#' ``` +#' +#' @returns +#' String, the name of the backend Keras is currently using. One of +#' `"tensorflow"`, `"torch"`, or `"jax"`. +#' +#' @export +#' @family config backend +#' @family backend +#' @family config +#' @seealso +#' [use_backend()] +# + +#' @tether keras.config.backend +config_backend <- +function () +{ + keras$config$backend() +} + +#' Reload the backend (and the Keras package). +#' +#' @description +#' +#' # Examples +#' ```python +#' config_set_backend("jax") +#' ``` +#' +#' # WARNING +#' Using this function is dangerous and should be done +#' carefully. Changing the backend will **NOT** convert +#' the type of any already-instantiated objects. +#' Thus, any layers / tensors / etc. already created will no +#' longer be usable without errors. It is strongly recommended **not** +#' to keep around **any** Keras-originated objects instances created +#' before calling `config_set_backend()`. +#' +#' This includes any function or class instance that uses any Keras +#' functionality. All such code needs to be re-executed after calling +#' `config_set_backend()`. +#' +#' @param backend String +#' +#' @returns Nothing, this function is called for its side effect. +#' +#' @family config +#' @export +#' @tether keras.config.set_backend +config_set_backend <- +function (backend) +{ + if(!is_keras_loaded()) + return(use_backend(backend)) + keras$config$set_backend(backend) + invisible(backend) +} + + +#' Return the value of the fuzz factor used in numeric expressions. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' config_epsilon() +#' ``` +#' +#' @returns +#' A float. +#' +#' @export +#' @family config backend +#' @family backend +#' @family config +#' @seealso +#' + +# + +#' @tether keras.config.epsilon +config_epsilon <- +function () +{ + keras$config$epsilon() +} + + +#' Return the default float type, as a string. +#' +#' @description +#' E.g. `'bfloat16'` `'float16'`, `'float32'`, `'float64'`. +#' +#' # Examples +#' ```{r} +#' keras3::config_floatx() +#' ``` +#' +#' @returns +#' String, the current default float type. +#' +#' @export +#' @family config backend +#' @family backend +#' @family config +#' @seealso +#' + +# + +#' @tether keras.config.floatx +config_floatx <- +function () +{ + keras$config$floatx() +} + + +#?? +function(x) { + # config_floatx? + if(missing(x)) + keras$config$floatx() + else + keras$config$set_floatx(x) +} + + + + +#' Return the default image data format convention. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' config_image_data_format() +#' ``` +#' +#' @returns +#' A string, either `'channels_first'` or `'channels_last'`. +#' +#' @export +#' @family config backend +#' @family backend +#' @family config +#' @seealso +#' + +# + +#' @tether keras.config.image_data_format +config_image_data_format <- +function () +{ + args <- capture_args() + do.call(keras$config$image_data_format, args) +} + + +#' Set the value of the fuzz factor used in numeric expressions. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' config_epsilon() +#' ``` +#' +#' ```{r} +#' config_set_epsilon(1e-5) +#' config_epsilon() +#' ``` +#' +#' ```{r} +#' # Set it back to the default value. +#' config_set_epsilon(1e-7) +#' ``` +#' +#' @param value +#' float. New value of epsilon. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family config backend +#' @family backend +#' @family config +#' @seealso +#' + +# + +#' @tether keras.config.set_epsilon +config_set_epsilon <- +function (value) +{ + args <- capture_args() + do.call(keras$config$set_epsilon, args) +} + + +#' Set the default float dtype. +#' +#' @description +#' +#' # Note +#' It is not recommended to set this to `"float16"` for training, +#' as this will likely cause numeric stability issues. +#' Instead, mixed precision, which leverages +#' a mix of `float16` and `float32`. It can be configured by calling +#' `keras3::keras$mixed_precision$set_dtype_policy('mixed_float16')`. +#' +#' # Examples +#' ```{r} +#' config_floatx() +#' ``` +#' +#' ```{r} +#' config_set_floatx('float64') +#' config_floatx() +#' ``` +#' +#' ```{r} +#' # Set it back to float32 +#' config_set_floatx('float32') +#' ``` +#' +#' # Raises +#' ValueError: In case of invalid value. +#' +#' @param value +#' String; `'bfloat16'`, `'float16'`, `'float32'`, or `'float64'`. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family config backend +#' @family backend +#' @family config +#' @seealso +#' + +# + +#' @tether keras.config.set_floatx +config_set_floatx <- +function (value) +{ + keras$config$set_floatx(value) +} + + +#' Set the value of the image data format convention. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' config_image_data_format() +#' # 'channels_last' +#' ``` +#' +#' ```{r} +#' keras3::config_set_image_data_format('channels_first') +#' config_image_data_format() +#' ``` +#' +#' ```{r} +#' # Set it back to `'channels_last'` +#' keras3::config_set_image_data_format('channels_last') +#' ``` +#' +#' @param data_format +#' string. `'channels_first'` or `'channels_last'`. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family config backend +#' @family backend +#' @family config +#' @seealso +#' + +# + +#' @tether keras.config.set_image_data_format +config_set_image_data_format <- +function (data_format) +{ + args <- capture_args() + do.call(keras$config$set_image_data_format, args) +} + + +#' Disables safe mode globally, allowing deserialization of lambdas. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family saving +#' @family config +# @seealso +# + +#' @tether keras.config.enable_unsafe_deserialization +config_enable_unsafe_deserialization <- +function () +{ + args <- capture_args() + do.call(keras$config$enable_unsafe_deserialization, args) +} + + +#' Turn off interactive logging. +#' +#' @description +#' When interactive logging is disabled, Keras sends logs to `absl.logging`. +#' This is the best option when using Keras in a non-interactive +#' way, such as running a training or inference job on a server. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family io utils +#' @family utils +#' @family config +# @seealso +# + +#' @tether keras.config.disable_interactive_logging +config_disable_interactive_logging <- +function () +{ + args <- capture_args() + do.call(keras$config$disable_interactive_logging, args) +} + + +#' Turn on interactive logging. +#' +#' @description +#' When interactive logging is enabled, Keras displays logs via stdout. +#' This provides the best experience when using Keras in an interactive +#' environment such as a shell or a notebook. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family io utils +#' @family utils +#' @family config +# @seealso +# + +#' @tether keras.config.enable_interactive_logging +config_enable_interactive_logging <- +function () +{ + args <- capture_args() + do.call(keras$config$enable_interactive_logging, args) +} + + +#' Check if interactive logging is enabled. +#' +#' @description +#' To switch between writing logs to stdout and `absl.logging`, you may use +#' [`config_enable_interactive_logging()`] and +#' [`config_disable_interactive_logging()`]. +#' +#' @returns +#' Boolean, `TRUE` if interactive logging is enabled, +#' and `FALSE` otherwise. +#' +#' @export +#' @family io utils +#' @family utils +#' @family config +# @seealso +# + +#' @tether keras.config.is_interactive_logging_enabled +config_is_interactive_logging_enabled <- +function () +{ + args <- capture_args() + do.call(keras$config$is_interactive_logging_enabled, args) +} + + +#' Turn off traceback filtering. +#' +#' @description +#' Raw Keras tracebacks (also known as stack traces) +#' involve many internal frames, which can be +#' challenging to read through, while not being actionable for end users. +#' By default, Keras filters internal frames in most exceptions that it +#' raises, to keep traceback short, readable, and focused on what's +#' actionable for you (your own code). +#' +#' See also [`config_enable_traceback_filtering()`] and +#' [`config_is_traceback_filtering_enabled()`]. +#' +#' If you have previously disabled traceback filtering via +#' [`config_disable_traceback_filtering()`], you can re-enable it via +#' [`config_enable_traceback_filtering()`]. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family traceback utils +#' @family utils +#' @family config +# @seealso +# + +#' @tether keras.config.disable_traceback_filtering +config_disable_traceback_filtering <- +function () +{ + args <- capture_args() + do.call(keras$config$disable_traceback_filtering, args) +} + + +#' Turn on traceback filtering. +#' +#' @description +#' Raw Keras tracebacks (also known as stack traces) +#' involve many internal frames, which can be +#' challenging to read through, while not being actionable for end users. +#' By default, Keras filters internal frames in most exceptions that it +#' raises, to keep traceback short, readable, and focused on what's +#' actionable for you (your own code). +#' +#' See also [`config_disable_traceback_filtering()`] and +#' [`config_is_traceback_filtering_enabled()`]. +#' +#' If you have previously disabled traceback filtering via +#' [`config_disable_traceback_filtering()`], you can re-enable it via +#' [`config_enable_traceback_filtering()`]. +#' +#' @returns No return value, called for side effects. +#' @export +#' @family traceback utils +#' @family utils +#' @family config +# @seealso +# + +#' @tether keras.config.enable_traceback_filtering +config_enable_traceback_filtering <- +function () +{ + args <- capture_args() + do.call(keras$config$enable_traceback_filtering, args) +} + + +#' Check if traceback filtering is enabled. +#' +#' @description +#' Raw Keras tracebacks (also known as stack traces) +#' involve many internal frames, which can be +#' challenging to read through, while not being actionable for end users. +#' By default, Keras filters internal frames in most exceptions that it +#' raises, to keep traceback short, readable, and focused on what's +#' actionable for you (your own code). +#' +#' See also [`config_enable_traceback_filtering()`] and +#' [`config_disable_traceback_filtering()`]. +#' +#' If you have previously disabled traceback filtering via +#' [`config_disable_traceback_filtering()`], you can re-enable it via +#' [`config_enable_traceback_filtering()`]. +#' +#' @returns +#' Boolean, `TRUE` if traceback filtering is enabled, +#' and `FALSE` otherwise. +#' +#' @export +#' @family traceback utils +#' @family utils +#' @family config +# @seealso +# + +#' @tether keras.config.is_traceback_filtering_enabled +config_is_traceback_filtering_enabled <- +function () +{ + args <- capture_args() + do.call(keras$config$is_traceback_filtering_enabled, args) +} + + +#' Returns the current default dtype policy object. +#' +#' @export +#' @returns A `DTypePolicy` object. +#' @tether keras.config.dtype_policy +#' +# @seealso +# + +config_dtype_policy <- +function () +{ + keras$config$dtype_policy() +} + +#' Sets the default dtype policy globally. +#' +#' @description +#' +#' # Examples +#' ```r +#' config_set_dtype_policy("mixed_float16") +#' ``` +#' @param policy A string or `DTypePolicy` object. +#' @returns No return value, called for side effects. +#' @export +#' @tether keras.config.set_dtype_policy +# @seealso +# + +config_set_dtype_policy <- +function (policy) +{ + args <- capture_args() + do.call(keras$config$set_dtype_policy, args) +} + + diff --git a/R/constraints.R b/R/constraints.R index 0f40fdf678..88d404e33a 100644 --- a/R/constraints.R +++ b/R/constraints.R @@ -1,157 +1,140 @@ -#' Weight constraints -#' -#' Functions that impose constraints on weight values. -#' -#' @details -#' - `constraint_maxnorm()` constrains the weights incident to each -#' hidden unit to have a norm less than or equal to a desired value. -#' - `constraint_nonneg()` constraints the weights to be non-negative -#' - `constraint_unitnorm()` constrains the weights incident to each hidden -#' unit to have unit norm. -#' - `constraint_minmaxnorm()` constrains the weights incident to each -#' hidden unit to have the norm between a lower bound and an upper bound. -#' -#' @param axis The axis along which to calculate weight norms. For instance, in -#' a dense layer the weight matrix has shape `input_dim, output_dim`, set -#' `axis` to `0` to constrain each weight vector of length `input_dim,`. In a -#' convolution 2D layer with `dim_ordering="tf"`, the weight tensor has shape -#' `rows, cols, input_depth, output_depth`, set `axis` to `c(0, 1, 2)` to -#' constrain the weights of each filter tensor of size `rows, cols, -#' input_depth`. -#' @param min_value The minimum norm for the incoming weights. -#' @param max_value The maximum norm for the incoming weights. -#' @param rate The rate for enforcing the constraint: weights will be rescaled to -#' yield (1 - rate) * norm + rate * norm.clip(low, high). Effectively, this -#' means that rate=1.0 stands for strict enforcement of the constraint, while -#' rate<1.0 means that weights will be rescaled at each step to slowly move -#' towards a value inside the desired interval. -#' -#' -#' @section Custom constraints: -#' -#' You can implement your own constraint functions in R. A custom -#' constraint is an R function that takes weights (`w`) as input -#' and returns modified weights. Note that keras [backend()] tensor -#' functions (e.g. [k_greater_equal()]) should be used in the -#' implementation of custom constraints. For example: -#' -#' ```r -#' nonneg_constraint <- function(w) { -#' w * k_cast(k_greater_equal(w, 0), k_floatx()) -#' } -#' -#' layer_dense(units = 32, input_shape = c(784), -#' kernel_constraint = nonneg_constraint) -#' ``` -#' -#' Note that models which use custom constraints cannot be serialized using -#' [save_model_hdf5()]. Rather, the weights of the model should be saved -#' and restored using [save_model_weights_hdf5()]. -#' -#' @seealso [Dropout: A Simple Way to Prevent Neural Networks from Overfitting -#' Srivastava, Hinton, et al. -#' 2014](https://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) -#' -#' @name constraints -#' -#' @seealso [KerasConstraint] -#' -#' @export -constraint_maxnorm <- function(max_value = 2, axis = 0) { - keras$constraints$MaxNorm(max_value = as.integer(max_value), axis = as.integer(axis)) -} -#' @rdname constraints + +#' MaxNorm weight constraint. +#' +#' @description +#' Constrains the weights incident to each hidden unit +#' to have a norm less than or equal to a desired value. +#' +#' @param max_value +#' the maximum norm value for the incoming weights. +#' +#' @param axis +#' integer, axis along which to calculate weight norms. +#' For instance, in a `Dense` layer the weight matrix +#' has shape `(input_dim, output_dim)`, +#' set `axis` to `0` to constrain each weight vector +#' of length `(input_dim,)`. +#' In a `Conv2D` layer with `data_format = "channels_last"`, +#' the weight tensor has shape +#' `(rows, cols, input_depth, output_depth)`, +#' set `axis` to `[0, 1, 2]` +#' to constrain the weights of each filter tensor of size +#' `(rows, cols, input_depth)`. +#' +#' @returns A `Constraint` instance, a callable that can be passed to layer +#' constructors or used directly by calling it with tensors. #' @export -constraint_nonneg <- function() { - keras$constraints$NonNeg() +#' @family constraints +#' @seealso +#' + +# + +#' @tether keras.constraints.MaxNorm +constraint_maxnorm <- +function (max_value = 2L, axis = 1L) +{ + args <- capture_args(list(max_value = as_integer, axis = as_axis)) + do.call(keras$constraints$MaxNorm, args) } -#' @rdname constraints +#' MinMaxNorm weight constraint. +#' +#' @description +#' Constrains the weights incident to each hidden unit +#' to have the norm between a lower bound and an upper bound. +#' +#' @param min_value +#' the minimum norm for the incoming weights. +#' +#' @param max_value +#' the maximum norm for the incoming weights. +#' +#' @param rate +#' rate for enforcing the constraint: weights will be +#' rescaled to yield +#' op_clip? +#' `(1 - rate) * norm + rate * op_clip(norm, min_value, max_value)`. +#' Effectively, this means that rate = 1.0 stands for strict +#' enforcement of the constraint, while rate<1.0 means that +#' weights will be rescaled at each step to slowly move +#' towards a value inside the desired interval. +#' +#' @param axis +#' integer, axis along which to calculate weight norms. +#' For instance, in a `Dense` layer the weight matrix +#' has shape `(input_dim, output_dim)`, +#' set `axis` to `0` to constrain each weight vector +#' of length `(input_dim,)`. +#' In a `Conv2D` layer with `data_format = "channels_last"`, +#' the weight tensor has shape +#' `(rows, cols, input_depth, output_depth)`, +#' set `axis` to `[0, 1, 2]` +#' to constrain the weights of each filter tensor of size +#' `(rows, cols, input_depth)`. +#' +#' @inherit constraint_maxnorm return #' @export -constraint_unitnorm <- function(axis = 0) { - keras$constraints$UnitNorm(axis = as.integer(axis)) +#' @family constraints +#' @seealso +#' + +# + +#' @tether keras.constraints.MinMaxNorm +constraint_minmaxnorm <- +function (min_value = 0, max_value = 1, rate = 1, axis = 1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$constraints$MinMaxNorm, args) } -#' @rdname constraints + +#' Constrains the weights to be non-negative. +#' +#' @inherit constraint_maxnorm return #' @export -constraint_minmaxnorm <- function(min_value = 0.0, max_value = 1.0, rate = 1.0, axis = 0) { - keras$constraints$MinMaxNorm(min_value = min_value, max_value = max_value, rate = rate, axis = as.integer(axis)) +#' @family constraints +#' @seealso +#' + +# + +#' @tether keras.constraints.NonNeg +constraint_nonneg <- +function () +{ + args <- capture_args() + do.call(keras$constraints$NonNeg, args) } -#' (Deprecated) Base R6 class for Keras constraints -#' -#' New custom constraints are encouraged to subclass `keras$constraints$Constraint` directly. -#' -#' @docType class -#' -#' @format An [R6Class] generator object -#' -#' @section Methods: -#' \describe{ -#' \item{\code{call(w)}}{Constrain the specified weights.} -#' } -#' -#' @details You can implement a custom constraint either by creating an -#' R function that accepts a weights (`w`) parameter, or by creating -#' an R6 class that derives from `KerasConstraint` and implements a -#' `call` method. -#' -#' @note -#' Models which use custom constraints cannot be serialized using -#' [save_model_hdf5()]. Rather, the weights of the model should be saved -#' and restored using [save_model_weights_hdf5()]. -#' -#' @examples \dontrun{ -#' CustomNonNegConstraint <- R6::R6Class( -#' "CustomNonNegConstraint", -#' inherit = KerasConstraint, -#' public = list( -#' call = function(x) { -#' w * k_cast(k_greater_equal(w, 0), k_floatx()) -#' } -#' ) -#' ) -#' -#' layer_dense(units = 32, input_shape = c(784), -#' kernel_constraint = CustomNonNegConstraint$new()) -#' } -#' -#' @seealso [constraints] -#' -#' @keywords internal +#' Constrains the weights incident to each hidden unit to have unit norm. +#' +#' @param axis +#' integer, axis along which to calculate weight norms. +#' For instance, in a `Dense` layer the weight matrix +#' has shape `(input_dim, output_dim)`, +#' set `axis` to `0` to constrain each weight vector +#' of length `(input_dim,)`. +#' In a `Conv2D` layer with `data_format = "channels_last"`, +#' the weight tensor has shape +#' `(rows, cols, input_depth, output_depth)`, +#' set `axis` to `[0, 1, 2]` +#' to constrain the weights of each filter tensor of size +#' `(rows, cols, input_depth)`. +#' +#' @inherit constraint_maxnorm return #' @export -KerasConstraint <- R6::R6Class("KerasConstraint", - public = list( - call = function(w) { - stop("Keras custom constraints must implement the call function") - }, - get_config = function() { - reticulate::dict() - }) -) +#' @family constraints +#' @seealso +#' + +# + +#' @tether keras.constraints.UnitNorm +constraint_unitnorm <- +function (axis = 1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$constraints$UnitNorm, args) +} -as_constraint <- function(constraint) { - # helper to create constraint - create_constraint <- function(call, get_config = NULL) { - if (is.null(get_config)) - get_config <- function() dict() - python_path <- system.file("python", package = "keras") - tools <- import_from_path("kerastools", path = python_path) - tools$constraint$RConstraint(call, get_config) - } +# -------------------------------------------------------------------------------- - if (inherits(constraint, "keras.constraints.Constraint")) { - constraint - } else if (is.function(constraint)) { - create_constraint(constraint) - } else if (inherits(constraint, "KerasConstraint")) { - create_constraint(constraint$call, constraint$get_config) - } else { - constraint - } -} diff --git a/R/dataset-utils.R b/R/dataset-utils.R new file mode 100644 index 0000000000..649a448596 --- /dev/null +++ b/R/dataset-utils.R @@ -0,0 +1,743 @@ + + +#' Packs user-provided data into a list. +#' +#' @description +#' This is a convenience utility for packing data into the list formats +#' that `fit()` uses. +#' +#' # Example +#' +#' ```{r} +#' x <- op_ones(c(10, 1)) +#' data <- pack_x_y_sample_weight(x) +#' +#' +#' y <- op_ones(c(10, 1)) +#' data <- pack_x_y_sample_weight(x, y) +#' ``` +#' +#' @returns +#' List in the format used in `fit()`. +#' +#' @param x +#' Features to pass to `Model`. +#' +#' @param y +#' Ground-truth targets to pass to `Model`. +#' +#' @param sample_weight +#' Sample weight for each element. +#' +#' @export +#' @family data utils +#' @family utils +# @seealso +# + +#' +#' @tether keras.utils.pack_x_y_sample_weight +#' @keywords internal +pack_x_y_sample_weight <- +function (x, y = NULL, sample_weight = NULL) +{ + args <- capture_args() + do.call(keras$utils$pack_x_y_sample_weight, args) +} + + +#' Unpacks user-provided data list. +#' +#' @description +#' This is a convenience utility to be used when overriding +#' `$train_step`, `$test_step`, or `$predict_step`. +#' This utility makes it easy to support data of the form `(x,)`, +#' `(x, y)`, or `(x, y, sample_weight)`. +#' +#' # Example: +#' +#' ```{r} +#' features_batch <- op_ones(c(10, 5)) +#' labels_batch <- op_zeros(c(10, 5)) +#' data <- list(features_batch, labels_batch) +#' # `y` and `sample_weight` will default to `NULL` if not provided. +#' c(x, y, sample_weight) %<-% unpack_x_y_sample_weight(data) +#' ``` +#' +#' You can also do the equivalent by providing default values to `%<-%` +#' +#' ```r +#' c(x, y = NULL, sample_weight = NULL) %<-% data +#' ``` +#' @returns +#' The unpacked list, with `NULL`s for `y` and `sample_weight` if they are +#' not provided. +#' +#' @param data +#' A list of the form `(x)`, `(x, y)`, or `(x, y, sample_weight)`. +#' +#' @export +#' @family data utils +#' @family utils +#' @keywords internal +# @seealso +# + +#' +#' @tether keras.utils.unpack_x_y_sample_weight +unpack_x_y_sample_weight <- +function (data) +{ + args <- capture_args() + do.call(keras$utils$unpack_x_y_sample_weight, args) +} + + +#' Generates a `tf.data.Dataset` from audio files in a directory. +#' +#' @description +#' If your directory structure is: +#' +#' ``` +#' main_directory/ +#' ...class_a/ +#' ......a_audio_1.wav +#' ......a_audio_2.wav +#' ...class_b/ +#' ......b_audio_1.wav +#' ......b_audio_2.wav +#' ``` +#' +#' Then calling `audio_dataset_from_directory(main_directory, +#' labels = 'inferred')` +#' will return a `tf.data.Dataset` that yields batches of audio files from +#' the subdirectories `class_a` and `class_b`, together with labels +#' 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). +#' +#' Only `.wav` files are supported at this time. +#' +#' @returns +#' A `tf.data.Dataset` object. +#' +#' - If `label_mode` is `NULL`, it yields `string` tensors of shape +#' `(batch_size,)`, containing the contents of a batch of audio files. +#' - Otherwise, it yields a tuple `(audio, labels)`, where `audio` +#' has shape `(batch_size, sequence_length, num_channels)` and `labels` +#' follows the format described +#' below. +#' +#' Rules regarding labels format: +#' +#' - if `label_mode` is `int`, the labels are an `int32` tensor of shape +#' `(batch_size,)`. +#' - if `label_mode` is `binary`, the labels are a `float32` tensor of +#' 1s and 0s of shape `(batch_size, 1)`. +#' - if `label_mode` is `categorical`, the labels are a `float32` tensor +#' of shape `(batch_size, num_classes)`, representing a one-hot +#' encoding of the class index. +#' +#' @param directory +#' Directory where the data is located. +#' If `labels` is `"inferred"`, it should contain subdirectories, +#' each containing audio files for a class. Otherwise, the directory +#' structure is ignored. +#' +#' @param labels +#' Either "inferred" (labels are generated from the directory +#' structure), `NULL` (no labels), or a list/tuple of integer labels +#' of the same size as the number of audio files found in +#' the directory. Labels should be sorted according to the +#' alphanumeric order of the audio file paths +#' (obtained via `os.walk(directory)` in Python). +#' +#' @param label_mode +#' String describing the encoding of `labels`. Options are: +#' - `"int"`: means that the labels are encoded as integers (e.g. for +#' `sparse_categorical_crossentropy` loss). +#' - `"categorical"` means that the labels are encoded as a categorical +#' vector (e.g. for `categorical_crossentropy` loss) +#' - `"binary"` means that the labels (there can be only 2) +#' are encoded as `float32` scalars with values 0 +#' or 1 (e.g. for `binary_crossentropy`). +#' - `NULL` (no labels). +#' +#' @param class_names +#' Only valid if "labels" is `"inferred"`. +#' This is the explicit list of class names +#' (must match names of subdirectories). Used to control the order +#' of the classes (otherwise alphanumerical order is used). +#' +#' @param batch_size +#' Size of the batches of data. Default: 32. If `NULL`, +#' the data will not be batched +#' (the dataset will yield individual samples). +#' +#' @param sampling_rate +#' Audio sampling rate (in samples per second). +#' +#' @param output_sequence_length +#' Maximum length of an audio sequence. Audio files +#' longer than this will be truncated to `output_sequence_length`. +#' If set to `NULL`, then all sequences in the same batch will +#' be padded to the +#' length of the longest sequence in the batch. +#' +#' @param ragged +#' Whether to return a Ragged dataset (where each sequence has its +#' own length). Defaults to `FALSE`. +#' +#' @param shuffle +#' Whether to shuffle the data. Defaults to `TRUE`. +#' If set to `FALSE`, sorts the data in alphanumeric order. +#' +#' @param seed +#' Optional random seed for shuffling and transformations. +#' +#' @param validation_split +#' Optional float between 0 and 1, fraction of data to +#' reserve for validation. +#' +#' @param subset +#' Subset of the data to return. One of `"training"`, +#' `"validation"` or `"both"`. Only used if `validation_split` is set. +#' +#' @param follow_links +#' Whether to visits subdirectories pointed to by symlinks. +#' Defaults to `FALSE`. +#' +#' @param verbose +#' Whether to display number information on classes and +#' number of files found. Defaults to `TRUE`. +#' +#' +#' @export +#' @family dataset utils +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.audio_dataset_from_directory +audio_dataset_from_directory <- +function (directory, labels = "inferred", label_mode = "int", + class_names = NULL, batch_size = 32L, sampling_rate = NULL, + output_sequence_length = NULL, ragged = FALSE, shuffle = TRUE, + seed = NULL, validation_split = NULL, subset = NULL, follow_links = FALSE, + verbose = TRUE) +{ + args <- capture_args(list(labels = as_integer, label_mode = as_integer, + batch_size = as_integer, seed = as_integer)) + do.call(keras$utils$audio_dataset_from_directory, args) +} + + +#' Splits a dataset into a left half and a right half (e.g. train / test). +#' +#' @description +#' +#' # Examples +#' ```{r} +#' data <- random_uniform(c(1000, 4)) +#' c(left_ds, right_ds) %<-% split_dataset(list(data$numpy()), left_size = 0.8) +#' left_ds$cardinality() +#' right_ds$cardinality() +#' ``` +#' +#' @returns +#' A list of two `tf$data$Dataset` objects: +#' the left and right splits. +#' +#' @param dataset +#' A `tf$data$Dataset`, a `torch$utils$data$Dataset` object, +#' or a list of arrays with the same length. +#' +#' @param left_size +#' If float (in the range `[0, 1]`), it signifies +#' the fraction of the data to pack in the left dataset. If integer, it +#' signifies the number of samples to pack in the left dataset. If +#' `NULL`, defaults to the complement to `right_size`. +#' Defaults to `NULL`. +#' +#' @param right_size +#' If float (in the range `[0, 1]`), it signifies +#' the fraction of the data to pack in the right dataset. +#' If integer, it signifies the number of samples to pack +#' in the right dataset. +#' If `NULL`, defaults to the complement to `left_size`. +#' Defaults to `NULL`. +#' +#' @param shuffle +#' Boolean, whether to shuffle the data before splitting it. +#' +#' @param seed +#' A random seed for shuffling. +#' +#' @export +#' @family dataset utils +#' @family utils +#' @seealso +#' + +# + +#' +#' @tether keras.utils.split_dataset +split_dataset <- +function (dataset, left_size = NULL, right_size = NULL, shuffle = FALSE, + seed = NULL) +{ + args <- capture_args(list(left_size = function (x) + ifelse(x < 1, x, as_integer(x)), right_size = function (x) + ifelse(x < 1, x, as_integer(x)), seed = as_integer)) + do.call(keras$utils$split_dataset, args) +} + + + +#' Generates a `tf.data.Dataset` from image files in a directory. +#' +#' @description +#' If your directory structure is: +#' +#' ``` +#' main_directory/ +#' ...class_a/ +#' ......a_image_1.jpg +#' ......a_image_2.jpg +#' ...class_b/ +#' ......b_image_1.jpg +#' ......b_image_2.jpg +#' ``` +#' +#' Then calling `image_dataset_from_directory(main_directory, +#' labels = 'inferred')` will return a `tf.data.Dataset` that yields batches of +#' images from the subdirectories `class_a` and `class_b`, together with labels +#' 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). +#' +#' Supported image formats: `.jpeg`, `.jpg`, `.png`, `.bmp`, `.gif`. +#' Animated gifs are truncated to the first frame. +#' +#' @returns +#' A `tf.data.Dataset` object. +#' +#' - If `label_mode` is `NULL`, it yields `float32` tensors of shape +#' `(batch_size, image_size[1], image_size[2], num_channels)`, +#' encoding images (see below for rules regarding `num_channels`). +#' - Otherwise, it yields a tuple `(images, labels)`, where `images` has +#' shape `(batch_size, image_size[1], image_size[2], num_channels)`, +#' and `labels` follows the format described below. +#' +#' Rules regarding labels format: +#' +#' - if `label_mode` is `"int"`, the labels are an `int32` tensor of shape +#' `(batch_size,)`. +#' - if `label_mode` is `"binary"`, the labels are a `float32` tensor of +#' 1s and 0s of shape `(batch_size, 1)`. +#' - if `label_mode` is `"categorical"`, the labels are a `float32` tensor +#' of shape `(batch_size, num_classes)`, representing a one-hot +#' encoding of the class index. +#' +#' Rules regarding number of channels in the yielded images: +#' +#' - if `color_mode` is `"grayscale"`, +#' there's 1 channel in the image tensors. +#' - if `color_mode` is `"rgb"`, +#' there are 3 channels in the image tensors. +#' - if `color_mode` is `"rgba"`, +#' there are 4 channels in the image tensors. +#' +#' @param directory +#' Directory where the data is located. +#' If `labels` is `"inferred"`, it should contain +#' subdirectories, each containing images for a class. +#' Otherwise, the directory structure is ignored. +#' +#' @param labels +#' Either `"inferred"` +#' (labels are generated from the directory structure), +#' `NULL` (no labels), +#' or a list/tuple of integer labels of the same size as the number of +#' image files found in the directory. Labels should be sorted +#' according to the alphanumeric order of the image file paths +#' (obtained via `os.walk(directory)` in Python). +#' +#' @param label_mode +#' String describing the encoding of `labels`. Options are: +#' - `"int"`: means that the labels are encoded as integers +#' (e.g. for `sparse_categorical_crossentropy` loss). +#' - `"categorical"` means that the labels are +#' encoded as a categorical vector +#' (e.g. for `categorical_crossentropy` loss). +#' - `"binary"` means that the labels (there can be only 2) +#' are encoded as `float32` scalars with values 0 or 1 +#' (e.g. for `binary_crossentropy`). +#' - `NULL` (no labels). +#' +#' @param class_names +#' Only valid if `labels` is `"inferred"`. +#' This is the explicit list of class names +#' (must match names of subdirectories). Used to control the order +#' of the classes (otherwise alphanumerical order is used). +#' +#' @param color_mode +#' One of `"grayscale"`, `"rgb"`, `"rgba"`. +#' Defaults to `"rgb"`. Whether the images will be converted to +#' have 1, 3, or 4 channels. +#' +#' @param batch_size +#' Size of the batches of data. Defaults to 32. +#' If `NULL`, the data will not be batched +#' (the dataset will yield individual samples). +#' +#' @param image_size +#' Size to resize images to after they are read from disk, +#' specified as `(height, width)`. Defaults to `(256, 256)`. +#' Since the pipeline processes batches of images that must all have +#' the same size, this must be provided. +#' +#' @param shuffle +#' Whether to shuffle the data. Defaults to `TRUE`. +#' If set to `FALSE`, sorts the data in alphanumeric order. +#' +#' @param seed +#' Optional random seed for shuffling and transformations. +#' +#' @param validation_split +#' Optional float between 0 and 1, +#' fraction of data to reserve for validation. +#' +#' @param subset +#' Subset of the data to return. +#' One of `"training"`, `"validation"`, or `"both"`. +#' Only used if `validation_split` is set. +#' When `subset = "both"`, the utility returns a tuple of two datasets +#' (the training and validation datasets respectively). +#' +#' @param interpolation +#' String, the interpolation method used when +#' resizing images. Defaults to `"bilinear"`. +#' Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, +#' `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. +#' +#' @param follow_links +#' Whether to visit subdirectories pointed to by symlinks. +#' Defaults to `FALSE`. +#' +#' @param crop_to_aspect_ratio +#' If `TRUE`, resize the images without aspect +#' ratio distortion. When the original aspect ratio differs from the +#' target aspect ratio, the output image will be cropped so as to +#' return the largest possible window in the image +#' (of size `image_size`) that matches the target aspect ratio. By +#' default (`crop_to_aspect_ratio = FALSE`), aspect ratio may not be +#' preserved. +#' +#' @param pad_to_aspect_ratio +#' If `TRUE`, resize the images without aspect +#' ratio distortion. When the original aspect ratio differs from the +#' target aspect ratio, the output image will be padded so as to +#' return the largest possible window in the image +#' (of size `image_size`) that matches the target aspect ratio. By +#' default (`pad_to_aspect_ratio=FALSE`), aspect ratio may not be +#' preserved. +#' +#' @param data_format +#' If `NULL` uses [`config_image_data_format()`] +#' otherwise either `'channel_last'` or `'channel_first'`. +#' +#' @param verbose +#' Whether to display number information on classes and +#' number of files found. Defaults to `TRUE`. +#' +#' @export +#' @family dataset utils +#' @family image dataset utils +#' @family utils +#' @family preprocessing +#' @seealso +#' + +# + +#' @tether keras.utils.image_dataset_from_directory +image_dataset_from_directory <- +function (directory, labels = "inferred", label_mode = "int", + class_names = NULL, color_mode = "rgb", batch_size = 32L, + image_size = c(256L, 256L), shuffle = TRUE, seed = NULL, + validation_split = NULL, subset = NULL, interpolation = "bilinear", + follow_links = FALSE, crop_to_aspect_ratio = FALSE, + pad_to_aspect_ratio = FALSE, data_format = NULL, verbose = TRUE) +{ + args <- capture_args(list(labels = as_integer, + image_size = function(x) lapply(x, as_integer), + batch_size = as_integer, seed = as_integer)) + do.call(keras$utils$image_dataset_from_directory, args) +} + + + +#' Generates a `tf.data.Dataset` from text files in a directory. +#' +#' @description +#' If your directory structure is: +#' +#' ``` +#' main_directory/ +#' ...class_a/ +#' ......a_text_1.txt +#' ......a_text_2.txt +#' ...class_b/ +#' ......b_text_1.txt +#' ......b_text_2.txt +#' ``` +#' +#' Then calling `text_dataset_from_directory(main_directory, +#' labels='inferred')` will return a `tf.data.Dataset` that yields batches of +#' texts from the subdirectories `class_a` and `class_b`, together with labels +#' 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). +#' +#' Only `.txt` files are supported at this time. +#' +#' @returns +#' A `tf.data.Dataset` object. +#' +#' - If `label_mode` is `NULL`, it yields `string` tensors of shape +#' `(batch_size,)`, containing the contents of a batch of text files. +#' - Otherwise, it yields a tuple `(texts, labels)`, where `texts` +#' has shape `(batch_size,)` and `labels` follows the format described +#' below. +#' +#' Rules regarding labels format: +#' +#' - if `label_mode` is `int`, the labels are an `int32` tensor of shape +#' `(batch_size,)`. +#' - if `label_mode` is `binary`, the labels are a `float32` tensor of +#' 1s and 0s of shape `(batch_size, 1)`. +#' - if `label_mode` is `categorical`, the labels are a `float32` tensor +#' of shape `(batch_size, num_classes)`, representing a one-hot +#' encoding of the class index. +#' +#' @param directory +#' Directory where the data is located. +#' If `labels` is `"inferred"`, it should contain +#' subdirectories, each containing text files for a class. +#' Otherwise, the directory structure is ignored. +#' +#' @param labels +#' Either `"inferred"` +#' (labels are generated from the directory structure), +#' `NULL` (no labels), +#' or a list/tuple of integer labels of the same size as the number of +#' text files found in the directory. Labels should be sorted according +#' to the alphanumeric order of the text file paths +#' (obtained via `os.walk(directory)` in Python). +#' +#' @param label_mode +#' String describing the encoding of `labels`. Options are: +#' - `"int"`: means that the labels are encoded as integers +#' (e.g. for `sparse_categorical_crossentropy` loss). +#' - `"categorical"` means that the labels are +#' encoded as a categorical vector +#' (e.g. for `categorical_crossentropy` loss). +#' - `"binary"` means that the labels (there can be only 2) +#' are encoded as `float32` scalars with values 0 or 1 +#' (e.g. for `binary_crossentropy`). +#' - `NULL` (no labels). +#' +#' @param class_names +#' Only valid if `"labels"` is `"inferred"`. +#' This is the explicit list of class names +#' (must match names of subdirectories). Used to control the order +#' of the classes (otherwise alphanumerical order is used). +#' +#' @param batch_size +#' Size of the batches of data. Defaults to 32. +#' If `NULL`, the data will not be batched +#' (the dataset will yield individual samples). +#' +#' @param max_length +#' Maximum size of a text string. Texts longer than this will +#' be truncated to `max_length`. +#' +#' @param shuffle +#' Whether to shuffle the data. Defaults to `TRUE`. +#' If set to `FALSE`, sorts the data in alphanumeric order. +#' +#' @param seed +#' Optional random seed for shuffling and transformations. +#' +#' @param validation_split +#' Optional float between 0 and 1, +#' fraction of data to reserve for validation. +#' +#' @param subset +#' Subset of the data to return. +#' One of `"training"`, `"validation"` or `"both"`. +#' Only used if `validation_split` is set. +#' When `subset="both"`, the utility returns a tuple of two datasets +#' (the training and validation datasets respectively). +#' +#' @param follow_links +#' Whether to visits subdirectories pointed to by symlinks. +#' Defaults to `FALSE`. +#' +#' @param verbose +#' Whether to display number information on classes and +#' number of files found. Defaults to `TRUE`. +#' +#' @export +#' @family dataset utils +#' @family text dataset utils +#' @family utils +#' @family preprocessing +#' @seealso +#' + +# + +#' @tether keras.utils.text_dataset_from_directory +text_dataset_from_directory <- +function (directory, labels = "inferred", label_mode = "int", + class_names = NULL, batch_size = 32L, max_length = NULL, + shuffle = TRUE, seed = NULL, validation_split = NULL, subset = NULL, + follow_links = FALSE, verbose = TRUE) +{ + args <- capture_args(list(labels = as_integer, label_mode = as_integer, + batch_size = as_integer, seed = as_integer)) + do.call(keras$utils$text_dataset_from_directory, args) +} + + +#' Creates a dataset of sliding windows over a timeseries provided as array. +#' +#' @description +#' This function takes in a sequence of data-points gathered at +#' equal intervals, along with time series parameters such as +#' length of the sequences/windows, spacing between two sequence/windows, etc., +#' to produce batches of timeseries inputs and targets. +#' +#' @returns +#' A `tf$data$Dataset` instance. If `targets` was passed, the dataset yields +#' list `(batch_of_sequences, batch_of_targets)`. If not, the dataset yields +#' only `batch_of_sequences`. +#' +#' Example 1: +#' +#' Consider indices `[0, 1, ... 98]`. +#' With `sequence_length=10, sampling_rate=2, sequence_stride=3`, +#' `shuffle=FALSE`, the dataset will yield batches of sequences +#' composed of the following indices: +#' +#' ``` +#' First sequence: [0 2 4 6 8 10 12 14 16 18] +#' Second sequence: [3 5 7 9 11 13 15 17 19 21] +#' Third sequence: [6 8 10 12 14 16 18 20 22 24] +#' ... +#' Last sequence: [78 80 82 84 86 88 90 92 94 96] +#' ``` +#' +#' In this case the last 2 data points are discarded since no full sequence +#' can be generated to include them (the next sequence would have started +#' at index 81, and thus its last step would have gone over 98). +#' +#' Example 2: Temporal regression. +#' +#' Consider an array `data` of scalar values, of shape `(steps,)`. +#' To generate a dataset that uses the past 10 +#' timesteps to predict the next timestep, you would use: +#' +#' ```{r} +#' data <- op_array(1:20) +#' input_data <- data[1:10] +#' targets <- data[11:20] +#' dataset <- timeseries_dataset_from_array( +#' input_data, targets, sequence_length=10) +#' iter <- reticulate::as_iterator(dataset) +#' reticulate::iter_next(iter) +#' ``` +#' +#' Example 3: Temporal regression for many-to-many architectures. +#' +#' Consider two arrays of scalar values `X` and `Y`, +#' both of shape `(100,)`. The resulting dataset should consist samples with +#' 20 timestamps each. The samples should not overlap. +#' To generate a dataset that uses the current timestamp +#' to predict the corresponding target timestep, you would use: +#' +#' ```{r} +#' X <- op_array(1:100) +#' Y <- X*2 +#' +#' sample_length <- 20 +#' input_dataset <- timeseries_dataset_from_array( +#' X, NULL, sequence_length=sample_length, sequence_stride=sample_length) +#' target_dataset <- timeseries_dataset_from_array( +#' Y, NULL, sequence_length=sample_length, sequence_stride=sample_length) +#' +#' +#' inputs <- reticulate::as_iterator(input_dataset) %>% reticulate::iter_next() +#' targets <- reticulate::as_iterator(target_dataset) %>% reticulate::iter_next() +#' ``` +#' +#' @param data +#' array or eager tensor +#' containing consecutive data points (timesteps). +#' The first dimension is expected to be the time dimension. +#' +#' @param targets +#' Targets corresponding to timesteps in `data`. +#' `targets[i]` should be the target +#' corresponding to the window that starts at index `i` +#' (see example 2 below). +#' Pass `NULL` if you don't have target data (in this case the dataset +#' will only yield the input data). +#' +#' @param sequence_length +#' Length of the output sequences +#' (in number of timesteps). +#' +#' @param sequence_stride +#' Period between successive output sequences. +#' For stride `s`, output samples would +#' start at index `data[i]`, `data[i + s]`, `data[i + 2 * s]`, etc. +#' +#' @param sampling_rate +#' Period between successive individual timesteps +#' within sequences. For rate `r`, timesteps +#' `data[i], data[i + r], ... data[i + sequence_length]` +#' are used for creating a sample sequence. +#' +#' @param batch_size +#' Number of timeseries samples in each batch +#' (except maybe the last one). If `NULL`, the data will not be batched +#' (the dataset will yield individual samples). +#' +#' @param shuffle +#' Whether to shuffle output samples, +#' or instead draw them in chronological order. +#' +#' @param seed +#' Optional int; random seed for shuffling. +#' +#' @param start_index +#' Optional int; data points earlier (exclusive) +#' than `start_index` will not be used +#' in the output sequences. This is useful to reserve part of the +#' data for test or validation. +#' +#' @param end_index +#' Optional int; data points later (exclusive) than `end_index` +#' will not be used in the output sequences. +#' This is useful to reserve part of the data for test or validation. +#' +#' @export +#' @family dataset utils +#' @family timesery dataset utils +#' @family utils +#' @family preprocessing +#' @seealso +#' + +# + +#' +#' @tether keras.utils.timeseries_dataset_from_array +timeseries_dataset_from_array <- +function (data, targets, sequence_length, sequence_stride = 1L, + sampling_rate = 1L, batch_size = 128L, shuffle = FALSE, seed = NULL, + start_index = NULL, end_index = NULL) +{ + args <- capture_args(list(sequence_stride = as_integer, + sampling_rate = as_integer, batch_size = as_integer, + seed = as_integer, start_index = as_integer, end_index = as_integer)) + do.call(keras$utils$timeseries_dataset_from_array, args) +} diff --git a/R/datasets.R b/R/datasets.R index 9a22f7be18..9e418afda3 100644 --- a/R/datasets.R +++ b/R/datasets.R @@ -6,7 +6,7 @@ #' Dataset of 50,000 32x32 color training images, labeled over 10 categories, #' and 10,000 test images. #' -#' @return Lists of training and test data: `train$x, train$y, test$x, test$y`. +#' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' #' The `x` data is an array of RGB image data with shape (num_samples, 3, 32, #' 32). @@ -31,7 +31,7 @@ dataset_cifar10 <- function() { #' #' @param label_mode one of "fine", "coarse". #' -#' @return Lists of training and test data: `train$x, train$y, test$x, test$y`. +#' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' #' The `x` data is an array of RGB image data with shape (num_samples, 3, 32, 32). #' @@ -76,11 +76,11 @@ dataset_cifar100 <- function(label_mode = c("fine", "coarse")) { #' `skip_top` limit will be replaced with this character. #' @param index_from Index actual words with this index and higher. #' -#' @return Lists of training and test data: `train$x, train$y, test$x, test$y`. +#' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' #' The `x` data includes integer sequences. If the `num_words` argument was #' specific, the maximum possible index value is `num_words-1`. If the -#' `maxlen`` argument was specified, the largest possible sequence length is +#' `maxlen` argument was specified, the largest possible sequence length is #' `maxlen`. #' #' The `y` data includes a set of integer labels (0 or 1). @@ -137,7 +137,7 @@ dataset_imdb_word_index <- function(path = "imdb_word_index.json") { #' `skip_top` limit will be replaced with this character. #' @param index_from index actual words with this index and higher. #' -#' @return Lists of training and test data: `train$x, train$y, test$x, test$y` +#' @returns Lists of training and test data: `train$x, train$y, test$x, test$y` #' with same format as [dataset_imdb()]. The `dataset_reuters_word_index()` #' function returns a list where the names are words and the values are #' integer. e.g. `word_index[["giraffe"]]` might return `1234`. @@ -177,7 +177,7 @@ dataset_reuters_word_index <- function(path = "reuters_word_index.pkl") { #' #' @param path Path where to cache the dataset locally (relative to ~/.keras/datasets). #' -#' @return Lists of training and test data: `train$x, train$y, test$x, test$y`, where +#' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`, where #' `x` is an array of grayscale image data with shape (num_samples, 28, 28) and `y` #' is an array of digit labels (integers in range 0-9) with shape (num_samples). #' @@ -198,7 +198,7 @@ dataset_mnist <- function(path = "mnist.npz") { #' correspond to T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, # 'Sneaker, Bag and Ankle boot. #' -#' @return Lists of training and test data: `train$x, train$y, test$x, test$y`, where +#' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`, where #' `x` is an array of grayscale image data with shape (num_samples, 28, 28) and `y` #' is an array of article labels (integers in range 0-9) with shape (num_samples). #' @@ -237,7 +237,7 @@ dataset_fashion_mnist <- function() { #' @param seed Random seed for shuffling the data before computing the test #' split. #' -#' @return Lists of training and test data: `train$x, train$y, test$x, test$y`. +#' @returns Lists of training and test data: `train$x, train$y, test$x, test$y`. #' #' Samples contain 13 attributes of houses at different locations around #' the Boston suburbs in the late 1970s. Targets are the median values of the diff --git a/R/feature-space.R b/R/feature-space.R new file mode 100644 index 0000000000..b88c6f41d8 --- /dev/null +++ b/R/feature-space.R @@ -0,0 +1,360 @@ + + +#' One-stop utility for preprocessing and encoding structured data. +#' +#' @description +#' **Available feature types:** +#' +#' Note that all features can be referred to by their string name, +#' e.g. `"integer_categorical"`. When using the string name, the default +#' argument values are used. +#' +#' ```{r, eval = FALSE} +#' # Plain float values. +#' feature_float(name = NULL) +#' +#' # Float values to be preprocessed via featurewise standardization +#' # (i.e. via a `layer_normalization()` layer). +#' feature_float_normalized(name = NULL) +#' +#' # Float values to be preprocessed via linear rescaling +#' # (i.e. via a `layer_rescaling` layer). +#' feature_float_rescaled(scale = 1., offset = 0., name = NULL) +#' +#' # Float values to be discretized. By default, the discrete +#' # representation will then be one-hot encoded. +#' feature_float_discretized( +#' num_bins, +#' bin_boundaries = NULL, +#' output_mode = "one_hot", +#' name = NULL +#' ) +#' +#' # Integer values to be indexed. By default, the discrete +#' # representation will then be one-hot encoded. +#' feature_integer_categorical( +#' max_tokens = NULL, +#' num_oov_indices = 1, +#' output_mode = "one_hot", +#' name = NULL +#' ) +#' +#' # String values to be indexed. By default, the discrete +#' # representation will then be one-hot encoded. +#' feature_string_categorical( +#' max_tokens = NULL, +#' num_oov_indices = 1, +#' output_mode = "one_hot", +#' name = NULL +#' ) +#' +#' # Integer values to be hashed into a fixed number of bins. +#' # By default, the discrete representation will then be one-hot encoded. +#' feature_integer_hashed(num_bins, output_mode = "one_hot", name = NULL) +#' +#' # String values to be hashed into a fixed number of bins. +#' # By default, the discrete representation will then be one-hot encoded. +#' feature_string_hashed(num_bins, output_mode = "one_hot", name = NULL) +#' ``` +#' # Examples +#' **Basic usage with a named list of input data:** +#' +#' ```{r} +#' raw_data <- list( +#' float_values = c(0.0, 0.1, 0.2, 0.3), +#' string_values = c("zero", "one", "two", "three"), +#' int_values = as.integer(c(0, 1, 2, 3)) +#' ) +#' +#' dataset <- tfdatasets::tensor_slices_dataset(raw_data) +#' +#' feature_space <- layer_feature_space( +#' features = list( +#' float_values = "float_normalized", +#' string_values = "string_categorical", +#' int_values = "integer_categorical" +#' ), +#' crosses = list(c("string_values", "int_values")), +#' output_mode = "concat" +#' ) +#' +#' # Before you start using the feature_space(), +#' # you must `adapt()` it on some data. +#' feature_space |> adapt(dataset) +#' +#' # You can call the feature_space() on a named list of +#' # data (batched or unbatched). +#' output_vector <- feature_space(raw_data) +#' ``` +#' +#' **Basic usage with `tf.data`:** +#' +#' ```{r, eval = FALSE} +#' library(tfdatasets) +#' # Unlabeled data +#' preprocessed_ds <- unlabeled_dataset |> +#' dataset_map(feature_space) +#' +#' # Labeled data +#' preprocessed_ds <- labeled_dataset |> +#' dataset_map(function(x, y) tuple(feature_space(x), y)) +#' ``` +#' +#' **Basic usage with the Keras Functional API:** +#' +#' ```{r} +#' # Retrieve a named list of Keras layer_input() objects +#' (inputs <- feature_space$get_inputs()) +#' # Retrieve the corresponding encoded Keras tensors +#' (encoded_features <- feature_space$get_encoded_features()) +#' # Build a Functional model +#' outputs <- encoded_features |> layer_dense(1, activation = "sigmoid") +#' model <- keras_model(inputs, outputs) +#' ``` +#' +#' **Customizing each feature or feature cross:** +#' +#' ```{r} +#' feature_space <- layer_feature_space( +#' features = list( +#' float_values = feature_float_normalized(), +#' string_values = feature_string_categorical(max_tokens = 10), +#' int_values = feature_integer_categorical(max_tokens = 10) +#' ), +#' crosses = list( +#' feature_cross(c("string_values", "int_values"), crossing_dim = 32) +#' ), +#' output_mode = "concat" +#' ) +#' ``` +#' +#' **Returning a dict (a named list) of integer-encoded features:** +#' +#' ```{r} +#' feature_space <- layer_feature_space( +#' features = list( +#' "string_values" = feature_string_categorical(output_mode = "int"), +#' "int_values" = feature_integer_categorical(output_mode = "int") +#' ), +#' crosses = list( +#' feature_cross( +#' feature_names = c("string_values", "int_values"), +#' crossing_dim = 32, +#' output_mode = "int" +#' ) +#' ), +#' output_mode = "dict" +#' ) +#' ``` +#' +#' **Specifying your own Keras preprocessing layer:** +#' +#' ```{r} +#' # Let's say that one of the features is a short text paragraph that +#' # we want to encode as a vector (one vector per paragraph) via TF-IDF. +#' data <- list(text = c("1st string", "2nd string", "3rd string")) +#' +#' # There's a Keras layer for this: layer_text_vectorization() +#' custom_layer <- layer_text_vectorization(output_mode = "tf_idf") +#' +#' # We can use feature_custom() to create a custom feature +#' # that will use our preprocessing layer. +#' feature_space <- layer_feature_space( +#' features = list( +#' text = feature_custom(preprocessor = custom_layer, +#' dtype = "string", +#' output_mode = "float" +#' ) +#' ), +#' output_mode = "concat" +#' ) +#' feature_space |> adapt(tfdatasets::tensor_slices_dataset(data)) +#' output_vector <- feature_space(data) +#' ``` +#' +#' **Retrieving the underlying Keras preprocessing layers:** +#' +#' ```{r, eval = FALSE} +#' # The preprocessing layer of each feature is available in `$preprocessors`. +#' preprocessing_layer <- feature_space$preprocessors$feature1 +#' +#' # The crossing layer of each feature cross is available in `$crossers`. +#' # It's an instance of layer_hashed_crossing() +#' crossing_layer <- feature_space$crossers[["feature1_X_feature2"]] +#' ``` +#' +#' **Saving and reloading a FeatureSpace:** +#' +#' ```{r, eval = FALSE} +#' feature_space$save("featurespace.keras") +#' reloaded_feature_space <- keras$models$load_model("featurespace.keras") +#' ``` +#' +#' @param feature_names +#' Named list mapping the names of your features to their +#' type specification, e.g. `list(my_feature = "integer_categorical")` +#' or `list(my_feature = feature_integer_categorical())`. +#' For a complete list of all supported types, see +#' "Available feature types" paragraph below. +#' +#' @param output_mode +#' A string. +#' - For `layer_feature_space()`, one of `"concat"` or `"dict"`. In concat mode, all +#' features get concatenated together into a single vector. +#' In dict mode, the `FeatureSpace` returns a named list of individually +#' encoded features (with the same names as the input list names). +#' - For the `feature_*` functions, one of: `"int"` `"one_hot"` or `"float"`. +#' +#' @param crosses +#' List of features to be crossed together, e.g. +#' `crosses=list(c("feature_1", "feature_2"))`. The features will be +#' "crossed" by hashing their combined value into +#' a fixed-length vector. +#' +#' @param crossing_dim +#' Default vector size for hashing crossed features. +#' Defaults to `32`. +#' +#' @param hashing_dim +#' Default vector size for hashing features of type +#' `"integer_hashed"` and `"string_hashed"`. Defaults to `32`. +#' +#' @param num_discretization_bins +#' Default number of bins to be used for +#' discretizing features of type `"float_discretized"`. +#' Defaults to `32`. +#' +#' @param name +#' String, name for the object +#' +#' @param object +#' see description +#' +#' @param features +#' see description +#' +#' @inherit layer_dense return +#' @export +#' @family preprocessing layers +#' @family layers +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.FeatureSpace +layer_feature_space <- +function (object, features, output_mode = "concat", crosses = NULL, + crossing_dim = 32L, hashing_dim = 32L, num_discretization_bins = 32L, + name = NULL, feature_names = NULL) +{ + args <- capture_args(list(crossing_dim = as_integer, hashing_dim = as_integer, + num_discretization_bins = as_integer, features = as.list), + ignore = "object") + create_layer(keras$utils$FeatureSpace, object, args) +} + + +#' @export +#' @rdname layer_feature_space +#' @tether keras.utils.FeatureSpace.cross +feature_cross <- +function(feature_names, crossing_dim, output_mode = "one_hot") { + args <- capture_args(list(crossing_dim = as_integer)) + keras$utils$FeatureSpace$cross(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @param dtype string, the output dtype of the feature. E.g., "float32". +#' @param preprocessor A callable. +#' @tether keras.utils.FeatureSpace.feature +feature_custom <- +function(dtype, preprocessor, output_mode) { + args <- capture_args() + keras$utils$FeatureSpace$feature(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @tether keras.utils.FeatureSpace.float +feature_float <- +function(name = NULL) { + args <- capture_args() + keras$utils$FeatureSpace$float(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @param scale,offset Passed on to [`layer_rescaling()`] +#' @tether keras.utils.FeatureSpace.float_rescaled +feature_float_rescaled <- +function (scale = 1.0, offset = 0.0, name = NULL) { + args <- capture_args() + keras$utils$FeatureSpace$float_rescaled(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @tether keras.utils.FeatureSpace.float_normalized +feature_float_normalized <- +function(name = NULL) { + args <- capture_args() + keras$utils$FeatureSpace$float_normalized(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @param num_bins,bin_boundaries Passed on to [`layer_discretization()`] +#' @tether keras.utils.FeatureSpace.float_discretized +feature_float_discretized <- +function(num_bins, bin_boundaries = NULL, output_mode = "one_hot", name = NULL) { + args <- capture_args(list(num_bins = as_integer)) + keras$utils$FeatureSpace$float_discretized(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @param max_tokens,num_oov_indices Passed on to [`layer_integer_lookup()`] by `feature_integer_categorical()` or to [`layer_string_lookup()`] by `feature_string_categorical()`. +#' @tether keras.utils.FeatureSpace.integer_categorical +feature_integer_categorical <- +function(max_tokens = NULL, + num_oov_indices = 1, + output_mode = "one_hot", + name = NULL) { + args <- capture_args(list(max_tokens = as_integer, num_oov_indices = as_integer)) + keras$utils$FeatureSpace$integer_categorical(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @tether keras.utils.FeatureSpace.string_categorical +feature_string_categorical <- +function(max_tokens = NULL, + num_oov_indices = 1, + output_mode = "one_hot", + name = NULL) { + args <- capture_args(list(max_tokens = as_integer, num_oov_indices = as_integer)) + keras$utils$FeatureSpace$string_categorical(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @tether keras.utils.FeatureSpace.string_hashed +feature_string_hashed <- +function(num_bins, + output_mode = "one_hot", + name = NULL) { + args <- capture_args(list(num_bins = as_integer)) + keras$utils$FeatureSpace$string_hashed(!!!args) +} + +#' @export +#' @rdname layer_feature_space +#' @tether keras.utils.FeatureSpace.integer_hashed +feature_integer_hashed <- +function(num_bins, + output_mode = "one_hot", + name = NULL) { + args <- capture_args(list(num_bins = as_integer)) + keras$utils$FeatureSpace$integer_hashed(!!!args) +} diff --git a/R/freeze.R b/R/freeze.R index 309a76008c..64d8cde72c 100644 --- a/R/freeze.R +++ b/R/freeze.R @@ -18,8 +18,11 @@ #' #' Models must be compiled again after weights are frozen or unfrozen. #' -#' @examples \dontrun{ -# instantiate a VGG16 model +#' @details +#' # Examples +#' +#' ```{r, strip.white = FALSE} +#' # instantiate a VGG16 model #' conv_base <- application_vgg16( #' weights = "imagenet", #' include_top = FALSE, @@ -29,19 +32,20 @@ #' # freeze it's weights #' freeze_weights(conv_base) #' +#' # Note the "Trainable" column #' conv_base #' #' # create a composite model that includes the base + more layers -#' model <- keras_model_sequential() %>% -#' conv_base() %>% -#' layer_flatten() %>% -#' layer_dense(units = 256, activation = "relu") %>% +#' model <- keras_model_sequential(input_batch_shape = shape(conv_base$input)) |> +#' conv_base() |> +#' layer_flatten() |> +#' layer_dense(units = 256, activation = "relu") |> #' layer_dense(units = 1, activation = "sigmoid") #' #' # compile -#' model %>% compile( +#' model |> compile( #' loss = "binary_crossentropy", -#' optimizer = optimizer_rmsprop(lr = 2e-5), +#' optimizer = optimizer_rmsprop(learning_rate = 2e-5), #' metrics = c("accuracy") #' ) #' @@ -49,14 +53,13 @@ #' print(model, expand_nested = TRUE) #' #' -#' #' # unfreeze weights from "block5_conv1" on #' unfreeze_weights(conv_base, from = "block5_conv1") #' #' # compile again since we froze or unfroze weights -#' model %>% compile( +#' model |> compile( #' loss = "binary_crossentropy", -#' optimizer = optimizer_rmsprop(lr = 2e-5), +#' optimizer = optimizer_rmsprop(learning_rate = 2e-5), #' metrics = c("accuracy") #' ) #' @@ -66,7 +69,7 @@ #' # freeze only the last 5 layers #' freeze_weights(conv_base, from = -5) #' conv_base -#' # equivalently, also freeze only the last 5 layers +#' # freeze only the last 5 layers, a different way #' unfreeze_weights(conv_base, to = -6) #' conv_base #' @@ -76,7 +79,8 @@ #' #' model <- application_efficientnet_b0() #' freeze_weights(model, which = is_batch_norm_layer) -#' model +#' # print(model) +#' #' # equivalent to: #' for(layer in model$layers) { #' if(is_batch_norm_layer(layer)) @@ -84,7 +88,10 @@ #' else #' layer$trainable <- TRUE #' } -#' } +#' ``` +#' @returns The input `object` with frozen weights is returned, invisibly. Note, +#' `object` is modified in place, and the return value is only provided to +#' make usage with the pipe convenient. #' @export freeze_weights <- function(object, from = NULL, to = NULL, which = NULL) { @@ -229,3 +236,8 @@ apply_which_trainable <- function(object, which, trainable) { return(invisible(object)) } + + +is_layer <- function(object) { + inherits(object, "keras.src.layers.layer.Layer") +} diff --git a/R/history.R b/R/history.R index 0dd45ee8a3..6bfa25dc61 100644 --- a/R/history.R +++ b/R/history.R @@ -46,7 +46,7 @@ print.keras_training_history <- function(x, ...) { #' Plots metrics recorded during training. #' #' @param x Training history object returned from -#' `fit.keras.engine.training.Model()`. +#' [`fit.keras.src.models.model.Model()`]. #' @param y Unused. #' @param metrics One or more metrics to plot (e.g. `c('loss', 'accuracy')`). #' Defaults to plotting all captured metrics. @@ -59,6 +59,12 @@ print.keras_training_history <- function(x, ...) { #' black and white. #' @param ... Additional parameters to pass to the [plot()] method. #' +#' @importFrom rlang .data +#' +#' @returns if `method == "ggplot2"`, the ggplot object is returned. If +#' `method == "base"`, then this function will draw to the graphics device and +#' return `NULL`, invisibly. +#' #' @export plot.keras_training_history <- function(x, y, metrics = NULL, method = c("auto", "ggplot2", "base"), smooth = getOption("keras.plot.history.smooth", TRUE), @@ -95,11 +101,11 @@ plot.keras_training_history <- function(x, y, metrics = NULL, method = c("auto", if (do_validation) { if (theme_bw) - p <- ggplot2::ggplot(df, ggplot2::aes_(~epoch, ~value, color = ~data, fill = ~data, linetype = ~data, shape = ~data)) + p <- ggplot2::ggplot(df, ggplot2::aes(.data$epoch, .data$value, color = .data$data, fill = .data$data, linetype = .data$data, shape = .data$data)) else - p <- ggplot2::ggplot(df, ggplot2::aes_(~epoch, ~value, color = ~data, fill = ~data)) + p <- ggplot2::ggplot(df, ggplot2::aes(.data$epoch, .data$value, color = .data$data, fill = .data$data)) } else { - p <- ggplot2::ggplot(df, ggplot2::aes_(~epoch, ~value)) + p <- ggplot2::ggplot(df, ggplot2::aes(.data$epoch, .data$value)) } smooth_args <- list(se = FALSE, method = 'loess', na.rm = TRUE, @@ -145,12 +151,13 @@ plot.keras_training_history <- function(x, y, metrics = NULL, method = c("auto", # adjust margins top_plot <- i == 1 bottom_plot <- i == length(metrics) + + mar <- c(1.5, 5, 0.5, 1.5) if (top_plot) - par(mar = c(1.5, 3, 1.5, 1.5)) - else if (bottom_plot) - par(mar = c(2.5, 3, .5, 1.5)) - else - par(mar = c(1.5, 3, .5, 1.5)) + mar[3] %<>% `+`(3.5) + if (bottom_plot) + mar[1] %<>% `+`(3.5) + par(mar = mar) # select data for current panel df2 <- df[df$metric == metric, ] @@ -168,6 +175,7 @@ plot.keras_training_history <- function(x, y, metrics = NULL, method = c("auto", else graphics::legend(legend_location, legend = metric, pch = 1) } + invisible(NULL) } } @@ -175,14 +183,8 @@ plot.keras_training_history <- function(x, y, metrics = NULL, method = c("auto", #' @export as.data.frame.keras_training_history <- function(x, ...) { - # filter out metrics that were collected for callbacks (e.g. lr) - if (tensorflow::tf_version() < "2.2") - x$metrics <- x$metrics[x$params$metrics] - if (tensorflow::tf_version() >= "2.1") - metric_names <- names(x$metrics) - else - metric_names <- x$params$metrics + metric_names <- names(x$metrics) # pad to epochs if necessary values <- x$metrics @@ -212,25 +214,16 @@ as.data.frame.keras_training_history <- function(x, ...) { to_keras_training_history <- function(history) { - # turn history into an R object so it can be persited and + # turn history into an R object so it can be persisted and # and give it a class so we can write print/plot methods params <- history$params - # we only see this info before TF 2.2 - if (tensorflow::tf_version() < "2.2") { - if (params$do_validation) { - if (!is.null(params$validation_steps)) - params$validation_samples <- params$validation_steps - else - params$validation_samples <- dim(history$validation_data[[1]])[[1]] - } - } # normalize metrics metrics <- history$history - metrics <- lapply(metrics, function(metric) { - as.numeric(lapply(metric, mean)) - }) + # metrics <- lapply(metrics, function(metric) { + # as.numeric(lapply(metric, mean)) + # }) # create history keras_training_history( @@ -242,14 +235,14 @@ to_keras_training_history <- function(history) { keras_training_history <- function(params, metrics) { # pad missing metrics with NA - rows <- max(as.integer(lapply(metrics, length))) - for (metric in names(metrics)) { - metric_data <- metrics[[metric]] - pad <- rows - length(metric_data) - pad_data <- rep_len(NA, pad) - metric_data <- c(metric_data, pad_data) - metrics[[metric]] <- metric_data - } + # rows <- max(as.integer(lapply(metrics, length))) + # for (metric in names(metrics)) { + # metric_data <- metrics[[metric]] + # pad <- rows - length(metric_data) + # pad_data <- rep_len(NA, pad) + # metric_data <- c(metric_data, pad_data) + # metrics[[metric]] <- metric_data + # } # return history structure(class = "keras_training_history", list( diff --git a/R/image-utils.R b/R/image-utils.R new file mode 100644 index 0000000000..756e2dba1a --- /dev/null +++ b/R/image-utils.R @@ -0,0 +1,198 @@ + +#' Saves an image stored as an array to a path or file object. +#' +#' @param path +#' Path or file object. +#' +#' @param x +#' An array. +#' +#' @param data_format +#' Image data format, either `"channels_first"` or +#' `"channels_last"`. +#' +#' @param file_format +#' Optional file format override. If omitted, the format to +#' use is determined from the filename extension. If a file object was +#' used instead of a filename, this parameter should always be used. +#' +#' @param scale +#' Whether to rescale image values to be within `[0, 255]`. +#' +#' @param ... +#' Additional keyword arguments passed to `PIL.Image.save()`. +#' +#' @returns Called primarily for side effects. The input `x` is returned, +#' invisibly, to enable usage with the pipe. +#' @export +#' @family image utils +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.save_img +image_array_save <- +function (x, path, data_format = NULL, file_format = NULL, scale = TRUE, + ...) +{ + args <- capture_args() + do.call(keras$utils$save_img, args) + invisible(x) +} + + +#' Converts a 3D array to a PIL Image instance. +#' +#' @description +#' +#' # Example +#' +#' ```{r} +#' img <- array(runif(30000), dim = c(100, 100, 3)) +#' pil_img <- image_from_array(img) +#' pil_img +#' ``` +#' +#' @returns +#' A PIL Image instance. +#' +#' @param x +#' Input data, in any form that can be converted to an array. +#' +#' @param data_format +#' Image data format, can be either `"channels_first"` or +#' `"channels_last"`. Defaults to `NULL`, in which case the global +#' setting `config_image_data_format()` is used (unless you +#' changed it, it defaults to `"channels_last"`). +#' +#' @param scale +#' Whether to rescale the image such that minimum and maximum values +#' are 0 and 255 respectively. Defaults to `TRUE`. +#' +#' @param dtype +#' Dtype to use. `NULL` means the global setting +#' `config_floatx()` is used (unless you changed it, it +#' defaults to `"float32"`). Defaults to `NULL`. +#' +#' @export +#' @family image utils +#' @family utils +# @seealso +# + +#' @tether keras.utils.array_to_img +image_from_array <- +function (x, data_format = NULL, scale = TRUE, dtype = NULL) +{ + args <- capture_args() + do.call(keras$utils$array_to_img, args) +} + + +#' Loads an image into PIL format. +#' +#' @description +#' +#' # Example +#' +#' ```{r} +#' image_path <- get_file(origin = "https://www.r-project.org/logo/Rlogo.png") +#' (image <- image_load(image_path)) +#' +#' input_arr <- image_to_array(image) +#' str(input_arr) +#' input_arr %<>% array_reshape(dim = c(1, dim(input_arr))) # Convert single image to a batch. +#' ``` +#' +#' +#' ```{r, eval = FALSE} +#' model |> predict(input_arr) +#' ``` +#' +#' @returns +#' A PIL Image instance. +#' +#' @param path +#' Path to image file. +#' +#' @param color_mode +#' One of `"grayscale"`, `"rgb"`, `"rgba"`. Default: `"rgb"`. +#' The desired image format. +#' +#' @param target_size +#' Either `NULL` (default to original size) or tuple of ints +#' `(img_height, img_width)`. +#' +#' @param interpolation +#' Interpolation method used to resample the image if the +#' target size is different from that of the loaded image. Supported +#' methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. +#' If PIL version 1.1.3 or newer is installed, `"lanczos"` +#' is also supported. If PIL version 3.4.0 or newer is installed, +#' `"box"` and `"hamming"` are also +#' supported. By default, `"nearest"` is used. +#' +#' @param keep_aspect_ratio +#' Boolean, whether to resize images to a target +#' size without aspect ratio distortion. The image is cropped in +#' the center with target aspect ratio before resizing. +#' +#' @export +#' @family image utils +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.load_img +image_load <- +function (path, color_mode = "rgb", target_size = NULL, interpolation = "nearest", + keep_aspect_ratio = FALSE) +{ + args <- capture_args() + do.call(keras$utils$load_img, args) +} + + +#' Converts a PIL Image instance to a matrix. +#' +#' @description +#' +#' # Example +#' +#' ```{r} +#' image_path <- get_file(origin = "https://www.r-project.org/logo/Rlogo.png") +#' (img <- image_load(image_path)) +#' +#' array <- image_to_array(img) +#' str(array) +#' ``` +#' +#' @returns +#' A 3D array. +#' +#' @param img +#' Input PIL Image instance. +#' +#' @param data_format +#' Image data format, can be either `"channels_first"` or +#' `"channels_last"`. Defaults to `NULL`, in which case the global +#' setting `config_image_data_format()` is used (unless you +#' changed it, it defaults to `"channels_last"`). +#' +#' @param dtype +#' Dtype to use. `NULL` means the global setting +#' `config_floatx()` is used (unless you changed it, it +#' defaults to `"float32"`). +#' +#' @export +#' @family image utils +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.img_to_array +image_to_array <- +function (img, data_format = NULL, dtype = NULL) +{ + args <- capture_args() + do.call(keras$utils$img_to_array, args) +} diff --git a/R/initializers.R b/R/initializers.R index a4c3cb465a..4d91fbdb32 100644 --- a/R/initializers.R +++ b/R/initializers.R @@ -1,298 +1,728 @@ - -#' Initializer that generates tensors initialized to 0. -#' -#' @family initializers -#' +#' Initializer that generates tensors with constant values. +#' +#' @description +#' Only scalar values are allowed. +#' The constant value provided must be convertible to the dtype requested +#' when calling the initializer. +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_constant(10) +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_constant(10) +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' @param value +#' A numeric scalar. +#' +#' @returns An `Initializer` instance that can be passed to layer or variable +#' constructors, or called directly with a `shape` to return a Tensor. #' @export -initializer_zeros <- function() { - keras$initializers$Zeros() +#' @family constant initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.Constant +initializer_constant <- +function (value = 0) +{ + args <- capture_args() + do.call(keras$initializers$Constant, args) } -#' Initializer that generates tensors initialized to 1. + +#' Initializer that generates the identity matrix. #' -#' @family initializers +#' @description +#' Only usable for generating 2D matrices. #' -#' @export -initializer_ones <- function() { - keras$initializers$Ones() -} - -#' Initializer that generates tensors initialized to a constant value. +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_identity() +#' values <- initializer(shape = c(2, 2)) +#' ``` #' -#' @param value float; the value of the generator tensors. +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_identity() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` #' -#' @family initializers +#' @param gain +#' Multiplicative factor to apply to the identity matrix. #' +#' @inherit initializer_constant return #' @export -initializer_constant <- function(value = 0) { - keras$initializers$Constant( - value = value - ) +#' @family constant initializers +#' @family initializers +# @seealso +# + +#' @tether keras.initializers.IdentityInitializer +initializer_identity <- +function (gain = 1) +{ + args <- capture_args() + do.call(keras$initializers$IdentityInitializer, args) } -#' Initializer that generates tensors with a normal distribution. +#' Initializer that generates tensors initialized to 1. #' -#' @param mean Mean of the random values to generate. -#' @param stddev Standard deviation of the random values to generate. -#' @param seed Integer used to seed the random generator. +#' @description +#' Also available via the shortcut function `ones`. #' -#' @family initializers +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_ones() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_ones() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` #' +#' @inherit initializer_constant return #' @export -initializer_random_normal <- function(mean = 0.0, stddev = 0.05, seed = NULL) { - keras$initializers$RandomNormal( - mean = mean, - stddev = stddev, - seed = as_nullable_integer(seed) - ) +#' @family constant initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.Ones +initializer_ones <- +function () +{ + args <- capture_args() + do.call(keras$initializers$Ones, args) } -#' Initializer that generates tensors with a uniform distribution. + +#' Initializer that generates tensors initialized to 0. #' +#' @description #' -#' @param minval Lower bound of the range of random values to generate. -#' @param maxval Upper bound of the range of random values to generate. Defaults to 1 for float types. -#' @param seed seed +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_zeros() +#' values <- initializer(shape = c(2, 2)) +#' ``` #' -#' @family initializers +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_zeros() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` #' +#' @inherit initializer_constant return #' @export -initializer_random_uniform <- function(minval = -0.05, maxval = 0.05, seed = NULL) { - keras$initializers$RandomUniform( - minval = minval, - maxval = maxval, - seed = as_nullable_integer(seed) - ) +#' @family constant initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.Zeros +initializer_zeros <- +function () +{ + args <- capture_args() + do.call(keras$initializers$Zeros, args) } -#' Initializer that generates a truncated normal distribution. -#' -#' These values are similar to values from an [initializer_random_normal()] -#' except that values more than two standard deviations from the mean -#' are discarded and re-drawn. This is the recommended initializer for -#' neural network weights and filters. -#' -#' @inheritParams initializer_random_normal -#' -#' @family initializers -#' +#' The Glorot normal initializer, also called Xavier normal initializer. +#' +#' @description +#' Draws samples from a truncated normal distribution centered on 0 with +#' `stddev = sqrt(2 / (fan_in + fan_out))` where `fan_in` is the number of +#' input units in the weight tensor and `fan_out` is the number of output units +#' in the weight tensor. +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_glorot_normal() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_glorot_normal() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' # Reference +#' - [Glorot et al., 2010](https://proceedings.mlr.press/v9/glorot10a.html) +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_truncated_normal <- function(mean = 0.0, stddev = 0.05, seed = NULL) { - keras$initializers$TruncatedNormal( - mean = mean, - stddev = stddev, - seed = as_nullable_integer(seed) - ) +#' @family random initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.GlorotNormal +initializer_glorot_normal <- +function (seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$GlorotNormal, args) } -#' Initializer capable of adapting its scale to the shape of weights. -#' -#' With `distribution="normal"`, samples are drawn from a truncated normal -#' distribution centered on zero, with `stddev = sqrt(scale / n)` where n is: -#' - number of input units in the weight tensor, if mode = "fan_in" -#' - number of output units, if mode = "fan_out" -#' - average of the numbers of input and output units, if mode = "fan_avg" -#' -#' With `distribution="uniform"`, samples are drawn from a uniform distribution -#' within `-limit, limit`, with `limit = sqrt(3 * scale / n)`. -#' -#' @inheritParams initializer_random_normal -#' -#' @param scale Scaling factor (positive float). -#' @param mode One of "fan_in", "fan_out", "fan_avg". -#' @param distribution One of "truncated_normal", "untruncated_normal" and "uniform". -#' For backward compatibility, "normal" will be accepted and converted to -#' "untruncated_normal". -#' -#' @family initializers -#' + +#' The Glorot uniform initializer, also called Xavier uniform initializer. +#' +#' @description +#' Draws samples from a uniform distribution within `[-limit, limit]`, where +#' `limit = sqrt(6 / (fan_in + fan_out))` (`fan_in` is the number of input +#' units in the weight tensor and `fan_out` is the number of output units). +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_glorot_uniform() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_glorot_uniform() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' # Reference +#' - [Glorot et al., 2010](https://proceedings.mlr.press/v9/glorot10a.html) +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_variance_scaling <- function(scale = 1.0, mode = c("fan_in", "fan_out", "fan_avg"), - distribution = c("normal", "uniform", "truncated_normal", "untruncated_normal"), - seed = NULL) { - if (get_keras_implementation() == "tensorflow" && tensorflow::tf_version() >= "2.0") { - - distribution <- match.arg(distribution) - - if (distribution == "normal") - distribution <- "untruncated_normal" - - keras$initializers$VarianceScaling( - scale = scale, - mode = match.arg(mode), - distribution = distribution, - seed = as_nullable_integer(seed) - ) - - } else { - keras$initializers$VarianceScaling( - scale = scale, - mode = match.arg(mode), - distribution = match.arg(distribution), - seed = as_nullable_integer(seed) - ) - } +#' @family random initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.GlorotUniform +initializer_glorot_uniform <- +function (seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$GlorotUniform, args) } -#' Initializer that generates a random orthogonal matrix. -#' -#' @inheritParams initializer_random_normal -#' -#' @param gain Multiplicative factor to apply to the orthogonal matrix. -#' -#' @section References: -#' Saxe et al., +#' He normal initializer. #' -#' @family initializers +#' @description +#' It draws samples from a truncated normal distribution centered on 0 with +#' `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in +#' the weight tensor. #' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_he_normal() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_he_normal() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' # Reference +#' - [He et al., 2015](https://arxiv.org/abs/1502.01852) +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_orthogonal <- function(gain = 1.0, seed = NULL) { - keras$initializers$Orthogonal( - gain = gain, - seed = as_nullable_integer(seed) - ) +#' @family random initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.HeNormal +initializer_he_normal <- +function (seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$HeNormal, args) } -#' Initializer that generates the identity matrix. -#' -#' Only use for square 2D matrices. -#' -#' @param gain Multiplicative factor to apply to the identity matrix -#' -#' @family initializers +#' He uniform variance scaling initializer. #' +#' @description +#' Draws samples from a uniform distribution within `[-limit, limit]`, where +#' `limit = sqrt(6 / fan_in)` (`fan_in` is the number of input units in the +#' weight tensor). +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_he_uniform() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_he_uniform() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' # Reference +#' - [He et al., 2015](https://arxiv.org/abs/1502.01852) +#' +#' @param seed +#' A integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_identity <- function(gain = 1.0) { - keras$initializers$Identity( - gain = gain - ) +#' @family random initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.HeUniform +initializer_he_uniform <- +function (seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$HeUniform, args) } -#' LeCun normal initializer. -#' -#' It draws samples from a truncated normal distribution centered on 0 with -#' `stddev <- sqrt(1 / fan_in)` where `fan_in` is the number of input units in -#' the weight tensor.. -#' -#' @param seed A Python integer. Used to seed the random generator. + +#' Lecun normal initializer. #' -#' @section References: -#' - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) -#' - Efficient Backprop, \cite{LeCun, Yann et al. 1998} +#' @description +#' Initializers allow you to pre-specify an initialization strategy, encoded in +#' the Initializer object, without knowing the shape and dtype of the variable +#' being initialized. #' -#' @family initializers +#' Draws samples from a truncated normal distribution centered on 0 with +#' `stddev = sqrt(1 / fan_in)` where `fan_in` is the number of input units in +#' the weight tensor. #' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_lecun_normal() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_lecun_normal() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' # Reference +#' - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_lecun_normal <- function(seed = NULL) { - keras$initializers$lecun_normal( - seed = as_nullable_integer(seed) - ) +#' @family random initializers +#' @family initializers +# @seealso +# + +#' @tether keras.initializers.LecunNormal +initializer_lecun_normal <- +function (seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$LecunNormal, args) } - -#' Glorot normal initializer, also called Xavier normal initializer. -#' -#' It draws samples from a truncated normal distribution centered on 0 -#' with `stddev = sqrt(2 / (fan_in + fan_out))` -#' where `fan_in` is the number of input units in the weight tensor -#' and `fan_out` is the number of output units in the weight tensor. -#' -#' @inheritParams initializer_random_normal -#' -#' @section References: -#' Glorot & Bengio, AISTATS 2010 -#' -#' @family initializers -#' +#' Lecun uniform initializer. +#' +#' @description +#' Draws samples from a uniform distribution within `[-limit, limit]`, where +#' `limit = sqrt(3 / fan_in)` (`fan_in` is the number of input units in the +#' weight tensor). +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_lecun_uniform() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_lecun_uniform() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' # Reference +#' - [Klambauer et al., 2017](https://arxiv.org/abs/1706.02515) +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_glorot_normal <- function(seed = NULL) { - keras$initializers$glorot_normal( - seed = as_nullable_integer(seed) - ) +#' @family random initializers +#' @family initializers +# @seealso +# + +#' @tether keras.initializers.LecunUniform +initializer_lecun_uniform <- +function (seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$LecunUniform, args) } -#' Glorot uniform initializer, also called Xavier uniform initializer. -#' -#' It draws samples from a uniform distribution within `-limit, limit` -#' where `limit` is `sqrt(6 / (fan_in + fan_out))` -#' where `fan_in` is the number of input units in the weight tensor -#' and `fan_out` is the number of output units in the weight tensor. -#' -#' @inheritParams initializer_random_normal -#' -#' @section References: -#' Glorot & Bengio, AISTATS 2010 -#' -#' @family initializers -#' +#' Initializer that generates an orthogonal matrix. +#' +#' @description +#' If the shape of the tensor to initialize is two-dimensional, it is +#' initialized with an orthogonal matrix obtained from the QR decomposition of +#' a matrix of random numbers drawn from a normal distribution. If the matrix +#' has fewer rows than columns then the output will have orthogonal rows. +#' Otherwise, the output will have orthogonal columns. +#' +#' If the shape of the tensor to initialize is more than two-dimensional, +#' a matrix of shape `(shape[1] * ... * shape[n - 1], shape[n])` +#' is initialized, where `n` is the length of the shape vector. +#' The matrix is subsequently reshaped to give a tensor of the desired shape. +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_orthogonal() +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_orthogonal() +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' # Reference +#' - [Saxe et al., 2014](https://openreview.net/forum?id=_wzZwKpTDF_9C) +#' +#' @param gain +#' Multiplicative factor to apply to the orthogonal matrix. +#' +#' @param seed +#' An integer. Used to make the behavior of the initializer +#' deterministic. +#' +#' @inherit initializer_constant return #' @export -initializer_glorot_uniform <- function(seed = NULL) { - keras$initializers$glorot_uniform( - seed = as_nullable_integer(seed) - ) +#' @family random initializers +#' @family initializers +# @seealso +# + +#' @tether keras.initializers.OrthogonalInitializer +initializer_orthogonal <- +function (gain = 1, seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$OrthogonalInitializer, args) } -#' He normal initializer. -#' -#' It draws samples from a truncated normal distribution centered on 0 with -#' `stddev = sqrt(2 / fan_in)` where `fan_in` is the number of input units in -#' the weight tensor. -#' -#' @inheritParams initializer_random_normal -#' -#' @section References: He et al., https://arxiv.org/abs/1502.01852 -#' -#' @family initializers -#' +#' Random normal initializer. +#' +#' @description +#' Draws samples from a normal distribution for given parameters. +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_random_normal(mean = 0.0, stddev = 1.0) +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_random_normal(mean = 0.0, stddev = 1.0) +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' @param mean +#' A numeric scalar. Mean of the random +#' values to generate. +#' +#' @param stddev +#' A numeric scalar. Standard deviation of +#' the random values to generate. +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_he_normal <- function(seed = NULL) { - keras$initializers$he_normal( - seed = seed - ) +#' @family random initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.RandomNormal +initializer_random_normal <- +function (mean = 0, stddev = 0.05, seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$RandomNormal, args) } -#' He uniform variance scaling initializer. -#' -#' It draws samples from a uniform distribution within `-limit, limit` where -#' `limit`` is `sqrt(6 / fan_in)` where `fan_in` is the number of input units in the -#' weight tensor. -#' -#' @inheritParams initializer_random_normal -#' -#' @section References: He et al., https://arxiv.org/abs/1502.01852 -#' + +#' Random uniform initializer. +#' +#' @description +#' Draws samples from a uniform distribution for given parameters. +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_random_uniform(minval = 0.0, maxval = 1.0) +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_random_uniform(minval = 0.0, maxval = 1.0) +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' @param minval +#' A numeric scalar or a scalar keras tensor. Lower bound of the +#' range of random values to generate (inclusive). +#' +#' @param maxval +#' A numeric scalar or a scalar keras tensor. Upper bound of the +#' range of random values to generate (exclusive). +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return +#' @export +#' @family random initializers #' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.RandomUniform +initializer_random_uniform <- +function (minval = -0.05, maxval = 0.05, seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$RandomUniform, args) +} + + +#' Initializer that generates a truncated normal distribution. #' +#' @description +#' The values generated are similar to values from a +#' `RandomNormal` initializer, except that values more +#' than two standard deviations from the mean are +#' discarded and re-drawn. +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_truncated_normal(mean = 0, stddev = 1) +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_truncated_normal(mean = 0, stddev = 1) +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' @param mean +#' A numeric scalar. Mean of the random +#' values to generate. +#' +#' @param stddev +#' A numeric scalar. Standard deviation of +#' the random values to generate. +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return #' @export -initializer_he_uniform <- function(seed = NULL) { - keras$initializers$he_uniform( - seed = as_nullable_integer(seed) - ) +#' @family random initializers +#' @family initializers +#' @seealso +#' + +# + +#' @tether keras.initializers.TruncatedNormal +initializer_truncated_normal <- +function (mean = 0, stddev = 0.05, seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$TruncatedNormal, args) } -#' LeCun uniform initializer. -#' -#' It draws samples from a uniform distribution within `-limit, limit` where -#' `limit` is `sqrt(3 / fan_in)` where `fan_in` is the number of input units in -#' the weight tensor. -#' -#' @inheritParams initializer_random_normal -#' -#' @section References: LeCun 98, Efficient Backprop, -#' + +#' Initializer that adapts its scale to the shape of its input tensors. +#' +#' @description +#' With `distribution = "truncated_normal" or "untruncated_normal"`, samples are +#' drawn from a truncated/untruncated normal distribution with a mean of zero +#' and a standard deviation (after truncation, if used) `stddev = sqrt(scale / +#' n)`, where `n` is: +#' +#' - number of input units in the weight tensor, if `mode = "fan_in"` +#' - number of output units, if `mode = "fan_out"` +#' - average of the numbers of input and output units, if `mode = "fan_avg"` +#' +#' With `distribution = "uniform"`, samples are drawn from a uniform distribution +#' within `[-limit, limit]`, where `limit = sqrt(3 * scale / n)`. +#' +#' # Examples +#' ```{r} +#' # Standalone usage: +#' initializer <- initializer_variance_scaling(scale = 0.1, mode = 'fan_in', +#' distribution = 'uniform') +#' values <- initializer(shape = c(2, 2)) +#' ``` +#' +#' ```{r} +#' # Usage in a Keras layer: +#' initializer <- initializer_variance_scaling(scale = 0.1, mode = 'fan_in', +#' distribution = 'uniform') +#' layer <- layer_dense(units = 3, kernel_initializer = initializer) +#' ``` +#' +#' @param scale +#' Scaling factor (positive float). +#' +#' @param mode +#' One of `"fan_in"`, `"fan_out"`, `"fan_avg"`. +#' +#' @param distribution +#' Random distribution to use. +#' One of `"truncated_normal"`, `"untruncated_normal"`, or `"uniform"`. +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @inherit initializer_constant return +#' @export +#' @family random initializers #' @family initializers -#' +#' @seealso +#' + +# + +#' @tether keras.initializers.VarianceScaling +initializer_variance_scaling <- +function (scale = 1, mode = "fan_in", distribution = "truncated_normal", + seed = NULL) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$initializers$VarianceScaling, args) +} + + + #' @export -initializer_lecun_uniform <- function(seed = NULL) { - keras$initializers$lecun_uniform( - seed = as_nullable_integer(seed) - ) +py_to_r_wrapper.keras.src.initializers.initializer.Initializer <- function(x) { + force(x) + as.function.default(c(formals(x), quote({ + args <- capture_args(list(shape = normalize_shape)) + do.call(x, args) + }))) } diff --git a/R/install.R b/R/install.R index 803615899d..79b3c18c8c 100644 --- a/R/install.R +++ b/R/install.R @@ -1,134 +1,209 @@ -#' Install TensorFlow and Keras, including all Python dependencies +#' Install Keras #' -#' This function will install Tensorflow and all Keras dependencies. This is a -#' thin wrapper around [`tensorflow::install_tensorflow()`], with the only -#' difference being that this includes by default additional extra packages that -#' keras expects, and the default version of tensorflow installed by -#' `install_keras()` may at times be different from the default installed -#' `install_tensorflow()`. The default version of tensorflow installed by -#' `install_keras()` is "`r default_version`". +#' This function will install Keras along with a selected backend, including all Python dependencies. #' -#' @details The default additional packages are: -#' `r paste(default_extra_packages("nightly"), collapse = ", ")`, with their -#' versions potentially constrained for compatibility with the -#' requested tensorflow version. +#' @param envname Name of or path to a Python virtual environment +#' @param extra_packages Additional Python packages to install alongside Keras +#' @param python_version Passed on to `reticulate::virtualenv_starter()` +#' @param backend Which backend(s) to install. Accepted values include `"tensorflow"`, `"jax"` and `"torch"` +#' @param gpu whether to install a GPU capable version of the backend. +#' @param restart_session Whether to restart the R session after installing (note this will only occur within RStudio). +#' @param ... reserved for future compatibility. #' -#' @inheritParams tensorflow::install_tensorflow -#' -#' @param tensorflow Synonym for `version`. Maintained for backwards. +#' @returns No return value, called for side effects. #' #' @seealso [`tensorflow::install_tensorflow()`] #' @export -install_keras <- function(method = c("auto", "virtualenv", "conda"), - conda = "auto", - version = "default", - tensorflow = version, - extra_packages = NULL, - ..., - pip_ignore_installed = TRUE) { - method <- match.arg(method) - - if(is_mac_arm64()) { - return(tensorflow::install_tensorflow( - method = method, - conda = conda, - version = version, - extra_packages = c("pandas", "Pillow", - "pydot", - "tensorflow-hub", - "tensorflow-datasets", - extra_packages), - ...)) +install_keras <- function( + envname = "r-keras", ..., + extra_packages = c("scipy", "pandas", "Pillow", "pydot", "ipython", "tensorflow_datasets"), + python_version = ">=3.9,<=3.11", + # backend = "tensorflow", + backend = c("tensorflow", "jax"), + # backend = "tf-nightly", + gpu = NA, + restart_session = TRUE) { + + if (is.na(gpu)) { + has_nvidia_gpu <- function() + tryCatch(as.logical(length(system("lspci | grep -i nvidia", intern = TRUE))), + warning = function(w) FALSE) + # don't install tensorflow-metal until it's been updated + # https://pypi.org/project/tensorflow-metal/#history + gpu <- (is_linux() && has_nvidia_gpu()) ## || is_mac_arm64() } - pkgs <- default_extra_packages(tensorflow) - if(!is.null(extra_packages)) # user supplied package version constraints take precedence - pkgs[gsub("[=<>~]{1,2}[0-9.]+$", "", extra_packages)] <- extra_packages - - if(tensorflow %in% c("cpu", "gpu")) - tensorflow <- paste0("default-", tensorflow) - - if(grepl("^default", tensorflow)) - tensorflow <- sub("^default", as.character(default_version), tensorflow) + # keras requires tensorflow be installed still. + if(!any(grepl("tensorflow|tf-nightly", backend))) + backend <- c("tensorflow", backend) + + if (isTRUE(gpu)) { + message("Installing GPU components") + if (is_mac_arm64()) { + jax <- c("jax-metal") # ??? do we still need this, "ml-dtypes==0.2.*") + ## installation of 'tensorflow-metal' is disabled until a new version that + ## is compatible with TF v2.16 is released. + # tensorflow <- c("tensorflow", "tensorflow-metal") + } else if (is_linux()) { + jax <- c("jax[cuda12_pip]", "-f", + "https://storage.googleapis.com/jax-releases/jax_cuda_releases.html") + tensorflow <- "tensorflow[and-cuda]" + } + } else { # no GPU + jax <- "jax[cpu]" + tensorflow <- if(is_linux()) "tensorflow-cpu" else "tensorflow" + } - tensorflow::install_tensorflow( - method = match.arg(method), - conda = conda, - version = tensorflow, - extra_packages = pkgs, - pip_ignore_installed = pip_ignore_installed, - ... + # The "numpy" backend requires that "jax" be installed + # if("jax" %in% backend && !is.null(extra_packages)) + # # undeclared dependency, import fails otherwise + # append(extra_packages) <- "packaging" + + backend <- unlist(lapply(backend, function(name) + switch(name, + jax = jax, + tensorflow = tensorflow, + "tf-nightly" = local({ + tensorflow <- sub("tensorflow", "tf-nightly", x = tensorflow, fixed = TRUE) + replace_val(tensorflow, "tf-nightly-metal", "tensorflow-metal") + }), + name) + )) + + reticulate::virtualenv_create( + envname = envname, + version = python_version, + force = identical(envname, "r-keras"), + packages = NULL ) -} + extra_packages <- unique(extra_packages) + if (length(extra_packages)) + reticulate::py_install(extra_packages, envname = envname) -default_version <- numeric_version("2.9") - -default_extra_packages <- function(tensorflow_version = "default") { - pkgs <- c( - "tensorflow-hub", - "scipy", - "requests", - "pyyaml", - "Pillow", - "h5py", - "pandas", - "pydot") - names(pkgs) <- pkgs - v <- tensorflow_version - - if(grepl("nightly|release", v)) - return(pkgs) - - ## extract just the version - # drop potential suffix - v <- sub("-?(gpu|cpu)$", "", v) - # treat rc as regular patch release - v <- sub("rc[0-9]+", "", v) - - constraint <- sub("^([><=~]{,2}).*", "\\1", v) - v <- substr(v, nchar(constraint)+1, nchar(v)) - - if(v %in% c("default", "")) # "" might be from cpu|gpu - v <- default_version - - v <- numeric_version(v) - if(nzchar(constraint)) { - # try to accommodate user supplied constraints by bumping `v` up or down - l <- length(unclass(v)[[1]]) - switch(constraint, - ">" = v[[1, l + 1]] <- 1, - "<" = { - v <- unclass(v)[[1]] - if(v[l] == 0) l <- l-1 - v[c(l, l+1)] <- c(v[l] - 1, 9999) - v <- numeric_version(paste0(v, collapse = ".")) - }, - "~=" = v[[1, l]] <- 9999) - } + if (length(backend)) + reticulate::py_install(backend, envname = envname) - if (v >= "2.6") { - # model.to_yaml/from_yaml removed in 2.6 - pkgs <- pkgs[names(pkgs) != "pyyaml"] - return(pkgs) + if(gpu && is_linux()) { + configure_cudnn_symlinks(envname = envname) } - if (v >= "2.4") { - pkgs["Pillow"] <- "Pillow<8.3" - return(pkgs) + reticulate::py_install("keras==3.*", envname = envname) + #, pip_ignore_installed = TRUE) + + message("Finished installing Keras!") + if (restart_session && requireNamespace("rstudioapi", quietly = TRUE) && + rstudioapi::hasFun("restartSession")) { + rstudioapi::restartSession() } - if (v >= "2.1") { - pkgs["pyyaml"] <- "pyyaml==3.12" - pkgs["h5py"] <- "h5py==2.10.0" - return(pkgs) + invisible(NULL) +} + +is_linux <- function() { + unname(Sys.info()[["sysname"]] == "Linux") +} + +#' Configure a Keras backend +#' +#' @param backend string, can be `"tensorflow"`, `"jax"`, `"numpy"`, or `"torch"`. +#' +#' @details +#' These functions allow configuring which backend keras will use. +#' Note that only one backend can be configured at a time. +#' +#' The function should be called after `library(keras3)` and before calling +#' other functions within the package (see below for an example). +#' +#' There is experimental support for changing the backend after keras has initialized. +#' using `config_set_backend()`. +#' ```r +#' library(keras3) +#' use_backend("tensorflow") +#' ``` +#' @returns Called primarily for side effects. Returns the provided `backend`, invisibly. +#' @export +use_backend <- function(backend) { + + if (is_keras_loaded()) { + if (config_backend() != backend) + stop("The keras backend must be set before keras has inititialized. Please restart the R session.") } + Sys.setenv(KERAS_BACKEND = backend) + + if (reticulate::py_available()) + reticulate::import("os")$environ$update(list(KERAS_BACKEND = backend)) + invisible(backend) +} + + +is_keras_loaded <- function() { + # package .onLoad() has run (can be FALSE if in devtools::load_all()) + !is.null(keras) && + + # python is initialized + reticulate::py_available() && + + # the keras module proxy has been resolved + # (reticulate:::py_module_proxy_import() + # removes 'module' from the lazy_loaded PyObjectRef module env) + !exists("module", envir = keras) +} + + + +get_cudnn_path <- function(python) { + + # For TF 2.13, this assumes that someone already has cudn 11-8 installed, + # e.g., on ubuntu: + # sudo apt install cuda-toolkit-11-8 + # also, that `python -m pip install 'nvidia-cudnn-cu11==8.6.*'` + + force(python) + cudnn_module_path <- suppressWarnings(system2( + python, c("-c", shQuote("import nvidia.cudnn;print(nvidia.cudnn.__file__)")), + stdout = TRUE, stderr = TRUE)) + if (!is.null(attr(cudnn_module_path, "status")) || + !is_string(cudnn_module_path) || + !file.exists(cudnn_module_path)) + return() + + dirname(cudnn_module_path) - pkgs } +configure_cudnn_symlinks <- function(envname) { + python <- reticulate::virtualenv_python(envname) + + cudnn_path <- get_cudnn_path(python) + # "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn" + + cudnn_sos <- Sys.glob(paste0(cudnn_path, "/lib/*.so*")) + # [1] "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_adv_infer.so.8" + # [2] "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_adv_train.so.8" + # [3] "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_cnn_infer.so.8" + # [4] "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_cnn_train.so.8" + # [5] "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_ops_infer.so.8" + # [6] "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn_ops_train.so.8" + # [7] "~/.virtualenvs/r-keras/lib/python3.11/site-packages/nvidia/cudnn/lib/libcudnn.so.8" + + # "/home/tomasz/.virtualenvs/r-tensorflow/lib/python3.8/site-packages/tensorflow/__init__.py" + tf_lib_path <- system2(python, c("-c", shQuote("import tensorflow as tf; print(tf.__file__)")), + stderr = FALSE, stdout = TRUE) + tf_lib_path <- dirname(tf_lib_path) + + from <- sub("^.*/site-packages/", "../", cudnn_sos) + to <- file.path(tf_lib_path, basename(cudnn_sos)) + writeLines("creating symlinks:") + writeLines(paste("-", shQuote(to), "->", shQuote(from))) +# creating symlinks: +# - '~/.virtualenvs/r-keras/lib/python3.11/site-packages/tensorflow/libcudnn_adv_infer.so.8' -> '../nvidia/cudnn/lib/libcudnn_adv_infer.so.8' +# - '~/.virtualenvs/r-keras/lib/python3.11/site-packages/tensorflow/libcudnn_adv_train.so.8' -> '../nvidia/cudnn/lib/libcudnn_adv_train.so.8' +# - '~/.virtualenvs/r-keras/lib/python3.11/site-packages/tensorflow/libcudnn_cnn_infer.so.8' -> '../nvidia/cudnn/lib/libcudnn_cnn_infer.so.8' +# - '~/.virtualenvs/r-keras/lib/python3.11/site-packages/tensorflow/libcudnn_cnn_train.so.8' -> '../nvidia/cudnn/lib/libcudnn_cnn_train.so.8' +# - '~/.virtualenvs/r-keras/lib/python3.11/site-packages/tensorflow/libcudnn_ops_infer.so.8' -> '../nvidia/cudnn/lib/libcudnn_ops_infer.so.8' +# - '~/.virtualenvs/r-keras/lib/python3.11/site-packages/tensorflow/libcudnn_ops_train.so.8' -> '../nvidia/cudnn/lib/libcudnn_ops_train.so.8' +# - '~/.virtualenvs/r-keras/lib/python3.11/site-packages/tensorflow/libcudnn.so.8' -> '../nvidia/cudnn/lib/libcudnn.so.8' + file.symlink(from = from, to = to) + +} -# @inheritSection tensorflow::install_tensorflow "Custom Installation" "Apple Silicon" "Additional Packages" -# @inherit tensorflow::install_tensorflow details -# @inherit tensorflow::install_tensorflow params return references description details sections -# ## everything except 'seealso' to avoid this warning -# ## Warning: Link to unknown topic in inherited text: keras::install_keras diff --git a/R/jax-methods.R b/R/jax-methods.R new file mode 100644 index 0000000000..38e5e48108 --- /dev/null +++ b/R/jax-methods.R @@ -0,0 +1,10 @@ + +#' @export +as.array.jaxlib.xla_extension.ArrayImpl <- function(x, ...) { + import("numpy")$asarray(x) +} + +#' @export +as.double.jaxlib.xla_extension.ArrayImpl <- function(x, ...) { + as.double(import("numpy")$asarray(x)) +} diff --git a/R/keras3-package.R b/R/keras3-package.R new file mode 100644 index 0000000000..9e643f5876 --- /dev/null +++ b/R/keras3-package.R @@ -0,0 +1,8 @@ +#' @keywords internal +"_PACKAGE" + +## usethis namespace: start +#' @importFrom glue trim +#' @import reticulate +## usethis namespace: end +NULL diff --git a/R/layer-attention.R b/R/layer-attention.R deleted file mode 100644 index 6f8e5dd357..0000000000 --- a/R/layer-attention.R +++ /dev/null @@ -1,171 +0,0 @@ - -#' Creates attention layer -#' -#' Dot-product attention layer, a.k.a. Luong-style attention. -#' -#' @inheritParams layer_dense -#' -#' @param inputs a list of inputs first should be the query tensor, the second the value tensor -#' @param use_scale If True, will create a scalar variable to scale the attention scores. -#' @param causal Boolean. Set to True for decoder self-attention. Adds a mask such that position i cannot attend to positions j > i. -#' This prevents the flow of information from the future towards the past. -#' -#' @family core layers -#' @family attention layers -#' -#' @export -layer_attention <- function(inputs,use_scale=FALSE, causal = FALSE, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - if (!is_tensorflow_implementation() || !tensorflow::tf_version() >= "1.14") - stop("layer_dense_features requires TensorFlow implementation and version >= 1.14") - create_layer(keras$layers$Attention, inputs, list( - use_scale = use_scale, - causal = causal, - batch_size = batch_size, - dtype = dtype, - name = name, - trainable = trainable, - weights = weights) - ) - - -} - -#' MultiHeadAttention layer -#' -#' This is an implementation of multi-headed attention based on "Attention is all -#' you Need". If query, key, value are the same, then this is self-attention. -#' Each timestep in query attends to the corresponding sequence in key, and returns -#' a fixed-width vector. -#' -#' This layer first projects query, key and value. These are (effectively) a list -#' of tensors of length num_attention_heads, where the corresponding shapes are -#' `[batch_size, , key_dim]`, `[batch_size, , key_dim]`, `[batch_size, , value_dim]`. -#' -#' Then, the query and key tensors are dot-producted and scaled. These are softmaxed -#' to obtain attention probabilities. The value tensors are then interpolated by -#' these probabilities, then concatenated back to a single tensor. -#' -#' Finally, the result tensor with the last dimension as value_dim can take an -#' linear projection and return. -#' -#' @inheritParams layer_attention -#' @param num_heads Number of attention heads. -#' @param key_dim Size of each attention head for query and key. -#' @param value_dim Size of each attention head for value. -#' @param dropout Dropout probability. -#' @param use_bias Boolean, whether the dense layers use bias vectors/matrices. -#' @param output_shape The expected shape of an output tensor, besides the batch and sequence dims. If not specified, projects back to the key feature dim. -#' @param attention_axes axes over which the attention is applied. None means attention over all axes, but batch, heads, and features. -#' @param kernel_initializer Initializer for dense layer kernels. -#' @param bias_initializer Initializer for dense layer biases. -#' @param kernel_regularizer Regularizer for dense layer kernels. -#' @param bias_regularizer Regularizer for dense layer biases. -#' @param activity_regularizer Regularizer for dense layer activity. -#' @param kernel_constraint Constraint for dense layer kernels. -#' @param bias_constraint Constraint for dense layer kernels. -#' @param ... Other arguments passed to the layer. Eg, `name`, `training`. -#' -#' @section Call arguments: -#' -#' * query: Query Tensor of shape `[B, T, dim]`. -#' * value: Value Tensor of shape `[B, S, dim]`. -#' * key: Optional key Tensor of shape `[B, S, dim]`. If not given, will use value -#' for both key and value, which is the most common case. -#' * attention_mask: a boolean mask of shape `[B, T, S]`, that prevents attention -#' to certain positions. -#' * return_attention_scores: A boolean to indicate whether the output should be -#' attention output if TRUE, or (attention_output, attention_scores) if FALSE. -#' Defaults to FALSE. -#' * training: Python boolean indicating whether the layer should behave in -#' training mode (adding dropout) or in inference mode (no dropout). Defaults -#' to either using the training mode of the parent layer/model, or FALSE -#' (inference) if there is no parent layer. -#' -#' @return -#' - attention_output: The result of the computation, of shape `[B, T, E]`, where -#' T is for target sequence shapes and E is the query input last dimension if -#' output_shape is None. Otherwise, the multi-head outputs are project to the -#' shape specified by output_shape. -#' - attention_scores: (Optional) multi-head attention coeffients over attention axes. -#' -#' @export -layer_multi_head_attention <- -function(inputs, - num_heads, - key_dim, - value_dim = NULL, - dropout = 0.0, - use_bias = TRUE, - output_shape = NULL, - attention_axes = NULL, - kernel_initializer = "glorot_uniform", - bias_initializer = "zeros", - kernel_regularizer = NULL, - bias_regularizer = NULL, - activity_regularizer = NULL, - kernel_constraint = NULL, - bias_constraint = NULL, - ...) { - if (tensorflow::tf_version() < "2.4") - stop("layer_multi_head_attention requires tf_version() >= 2.4") - - args <- capture_args(match.call(), - list(num_heads = as.integer, - key_dim = as.integer, - value_dim = as.integer), - ignore = "object") - - # intentionally don't pass `inputs` to avoid compose_layer() so we can - # unpack args to __call__ - layer <- create_layer(keras$layers$MultiHeadAttention, args = args) - - if (missing(inputs) || is.null(inputs)) - return(layer) - - if (!is.list(inputs)) - inputs <- list(inputs) - - do.call(layer, inputs) -} - - -#' Additive attention layer, a.k.a. Bahdanau-style attention -#' -#' @details -#' Inputs are `query` tensor of shape `[batch_size, Tq, dim]`, `value` tensor of -#' shape `[batch_size, Tv, dim]` and `key` tensor of shape -#' `[batch_size, Tv, dim]`. The calculation follows the steps: -#' -#' 1. Reshape `query` and `key` into shapes `[batch_size, Tq, 1, dim]` -#' and `[batch_size, 1, Tv, dim]` respectively. -#' 2. Calculate scores with shape `[batch_size, Tq, Tv]` as a non-linear -#' sum: `scores = tf.reduce_sum(tf.tanh(query + key), axis=-1)` -#' 3. Use scores to calculate a distribution with shape -#' `[batch_size, Tq, Tv]`: `distribution = tf$nn$softmax(scores)`. -#' 4. Use `distribution` to create a linear combination of `value` with -#' shape `[batch_size, Tq, dim]`: -#' `return tf$matmul(distribution, value)`. -#' -#' @inheritParams layer_dense -#' -#' @param use_scale If `TRUE`, will create a variable to scale the attention scores. -#' -#' @param causal Boolean. Set to `TRUE` for decoder self-attention. Adds a mask such -#' that position `i` cannot attend to positions `j > i`. This prevents the -#' flow of information from the future towards the past. -#' -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the -#' attention scores. -#' @param ... standard layer arguments. -#' -#' @seealso -#' + -#' + -#' @export -layer_additive_attention <- -function(object, use_scale = TRUE, ..., causal = FALSE, dropout = 0) -{ - args <- capture_args(match.call(), NULL, ignore = "object") - create_layer(keras$layers$AdditiveAttention, object, args) -} diff --git a/R/layer-custom.R b/R/layer-custom.R index 96803eae25..06d765fa82 100644 --- a/R/layer-custom.R +++ b/R/layer-custom.R @@ -1,128 +1,4 @@ - -#' (Deprecated) Base R6 class for Keras layers -#' -#' Custom R6 layers can now inherit directly from `keras$layers$Layer` or other layers. -#' -#' @docType class -#' -#' @format An [R6Class] generator object -#' @section Methods: \describe{ \item{\code{build(input_shape)}}{Creates the -#' layer weights (must be implemented by all layers that have weights)} -#' \item{\code{call(inputs,mask)}}{Call the layer on an input tensor.} -#' \item{\code{compute_output_shape(input_shape)}}{Compute the output shape -#' for the layer.} -#' \item{\code{add_loss(losses, inputs)}}{Add losses to the layer.} -#' \item{\code{add_weight(name,shape,dtype,initializer,regularizer,trainable,constraint)}}{Adds -#' a weight variable to the layer.} } -#' -#' @return [KerasLayer]. -#' -#' @keywords internal -#' -#' @export -KerasLayer <- R6Class("KerasLayer", - - public = list( - - # Create the layer weights. - build = function(input_shape) { - - }, - - # Call the layer on an input tensor. - call = function(inputs, mask = NULL) { - stop("Keras custom layers must implement the call function") - }, - - # Compute the output shape for the layer. - compute_output_shape = function(input_shape) { - input_shape - }, - - # Add losses to the layer - add_loss = function(losses, inputs = NULL) { - args <- list() - args$losses <- losses - args$inputs <- inputs - do.call(private$wrapper$add_loss, args) - }, - - # Adds a weight variable to the layer. - add_weight = function(name, shape, dtype = NULL, initializer = NULL, - regularizer = NULL, trainable = TRUE, constraint = NULL) { - - args <- list() - args$name <- name - args$shape <- shape - args$dtype <- dtype - args$initializer <- initializer - args$regularizer <- regularizer - args$trainable <- trainable - args$constraint <- constraint - - do.call(private$wrapper$add_weight, args) - }, - - # back reference to python layer that wraps us - .set_wrapper = function(wrapper) { - private$wrapper <- wrapper - }, - - python_layer = function() { - private$wrapper - } - ), - - active = list( - input = function(value) { - if (missing(value)) return(private$wrapper$input) - else private$wrapper$input <- value - }, - output = function(value) { - if (missing(value)) return(private$wrapper$output) - else private$wrapper$output <- value - } - ), - - private = list( - wrapper = NULL - ) -) - - -compat_custom_KerasLayer_handler <- function(layer_class, args) { - # common layer parameters (e.g. "input_shape") need to be passed to the - # Python Layer constructor rather than the R6 constructor. Here we - # extract and set aside any of those arguments we find and set them to - # NULL within the args list which will be passed to the R6 layer - common_arg_names <- c("input_shape", "batch_input_shape", "batch_size", - "dtype", "name", "trainable", "weights") - - py_wrapper_args <- args[common_arg_names] - py_wrapper_args[sapply(py_wrapper_args, is.null)] <- NULL - for (arg in names(py_wrapper_args)) - args[[arg]] <- NULL - - # create the R6 layer - r6_layer <- do.call(layer_class$new, args) - - # create the python wrapper (passing the extracted py_wrapper_args) - python_path <- system.file("python", package = "keras") - tools <- import_from_path("kerastools", path = python_path) - py_wrapper_args$r_build <- r6_layer$build - py_wrapper_args$r_call <- reticulate::py_func(r6_layer$call) - py_wrapper_args$r_compute_output_shape <- r6_layer$compute_output_shape - layer <- do.call(tools$layer$RLayer, py_wrapper_args) - - # set back reference in R layer - r6_layer$.set_wrapper(layer) - list(layer, args) -} - - - - py_formals <- function(py_obj) { # returns python fn formals as a list # like base::formals(), but for py functions/methods @@ -150,7 +26,7 @@ py_formals <- function(py_obj) { default <- param$default - if (inherits(default, "python.builtin.object")) { + if (is_py_object(default)) { if (default != inspect$Parameter$empty) # must be something complex that failed to convert warning(glue::glue( @@ -175,15 +51,14 @@ py_formals <- function(py_obj) { #' @param modifiers A named list of functions to modify to user-supplied #' arguments before they are passed on to the class constructor. (e.g., #' `list(units = as.integer)`) -#' @param convert Boolean, whether the Python class and its methods should by -#' default convert python objects to R objects. +#' @param convert ignored. #' #' See guide 'making_new_layers_and_models_via_subclassing.Rmd' for example usage. #' -#' @return An R function that behaves similarly to the builtin keras `layer_*` +#' @returns An R function that behaves similarly to the builtin keras `layer_*` #' functions. When called, it will create the class instance, and also #' optionally call it on a supplied argument `object` if it is present. This -#' enables keras layers to compose nicely with the pipe (`%>%`). +#' enables keras layers to compose nicely with the pipe (`|>`). #' #' The R function will arguments taken from the `initialize` (or `__init__`) #' method of the Layer. @@ -191,53 +66,27 @@ py_formals <- function(py_obj) { #' If Layer is an R6 object, this will delay initializing the python #' session, so it is safe to use in an R package. #' -#' @export +#' @keywords internal +#' @noRd #' @importFrom rlang %||% create_layer_wrapper <- function(Layer, modifiers = NULL, convert = TRUE) { - force(Layer) - force(modifiers) - - wrapper <- function(object) { - args <- capture_args(match.call(), modifiers, ignore = "object") - create_layer(Layer, object, args) - } - - formals(wrapper) <- local({ - - if(inherits(Layer, "py_R6ClassGenerator")) - Layer <- attr(Layer, "r6_class") + if(!isTRUE(convert)) + warning("convert argument is ignored") - if (inherits(Layer, "python.builtin.type")) { - f <- py_formals(Layer) - } else if (inherits(Layer, "R6ClassGenerator")) { - m <- Layer$public_methods - init <- m$initialize %||% m$`__init__` %||% function(){} - f <- formals(init) - } else - stop('Unrecognized type passed `create_layer_wrapper()`.', - ' class() must be an "R6ClassGenerator" or a "python.builtin.type"') - f$self <- NULL - c(formals(wrapper), f) - }) - - class(wrapper) <- c("keras_layer_wrapper", "function") - attr(wrapper, "Layer") <- Layer - - # create_layer() will call r_to_py() as needed, but we create a promise here - # to avoid creating the class constructor from scratch every time a class - # instance is created. - if (!inherits(Layer, "python.builtin.type")) - delayedAssign("Layer", r_to_py(attr(wrapper, "Layer", TRUE), convert)) - - wrapper -} + out <- as.function.default( + c(alist(object = ), formals(Layer), + bquote({ + args <- capture_args(.(modifiers), ignore = "object") + create_layer(Layer, object, args) + })), + envir = list2env(list(Layer = Layer), + parent = parent.env(environment())) + ) -#' @export -r_to_py.keras_layer_wrapper <- function(fn, convert = FALSE) { - layer <- attr(fn, "Layer", TRUE) - if (!inherits(layer, "python.builtin.type")) - layer <- r_to_py(layer, convert) - layer + class(out) <- c("keras_Layer_wrapper", + "keras_layer_wrapper", + "function") + out } diff --git a/R/layer-methods.R b/R/layer-methods.R index 573b7e8fac..84d728a8cd 100644 --- a/R/layer-methods.R +++ b/R/layer-methods.R @@ -1,7 +1,6 @@ - #' Layer/Model configuration #' #' A layer config is an object returned from `get_config()` that contains the @@ -12,14 +11,16 @@ #' #' @param object Layer or model object #' @param config Object with layer or model configuration +#' @param custom_objects list of custom objects needed to instantiate the layer, +#' e.g., custom layers defined by `new_layer_class()` or similar. #' -#' @return `get_config()` returns an object with the configuration, +#' @returns `get_config()` returns an object with the configuration, #' `from_config()` returns a re-instantiation of the object. #' -#' @note Objects returned from `get_config()` are not serializable. Therefore, -#' if you want to save and restore a model across sessions, you can use the -#' `model_to_json()` function (for model configuration only, not weights) or -#' the `save_model_tf()` function to save the model configuration and weights +#' @note Objects returned from `get_config()` are not serializable via RDS. If +#' you want to save and restore a model across sessions, you can use +#' [`save_model_config()`] (for model configuration only, not weights) +#' or [`save_model()`] to save the model configuration and weights #' to the filesystem. #' #' @family model functions @@ -27,38 +28,72 @@ #' #' @export get_config <- function(object) { - - # call using lower level reticulate functions to prevent conversion to list - # (the object will remain a python dictionary for full fidelity) - get_fn <- py_get_attr(object, "get_config") - config <- py_call(get_fn) - - # set attribute indicating class - attr(config, "config_class") <- object$`__class__` + config <- object$get_config() + attr(config, "__class__") <- object$`__class__` config } - #' @rdname get_config #' @export -from_config <- function(config) { - class <- attr(config, "config_class") - class$from_config(config) +from_config <- function(config, custom_objects = NULL) { + class <- attr(config, "__class__", TRUE) #%||% keras$Model + class <- resolve_py_obj(class, env = parent.frame()) + if(is.null(class) || reticulate::py_is_null_xptr(class)) + stop(glue::trim(' + attr(config, "__class__") is an invalid pointer from a previous R session. + The output of `get_config()` is not serializable via RDS.')) + + args <- list(config) + args$custom_objects <- normalize_custom_objects(custom_objects) + do.call(class$from_config, args) } + +# TODO: we might be able to make get_config() output serializable via saveRDS, +# if we replace __class__ with a module address, like +# `__class__`$`__module__` and `__module__`$`__name__`, but we'd need checks +# to make sure it's builtin/ importable python module. +# +# attr(config, "__class__.__module__") <- `__class__`$`__module__` +# attr(config, "__class__.__name__") <- `__class__`$`__name__` + +# OR: make it serializable only for models: +# `__class__` <- object$`__class__` +# if (!py_is(`__class__`, keras$Model)) +# attr(config, "__class__") <- `__class__` +# Then in from_config(): class <- attr(...) %||% keras$Model + +# @param class The Keras class to restore. This can be: +# You can update with `attr(config, "__class__") <- <__class__>`, where <__class__> can be +# - An R function like `layer_dense` or a custom `Layer()` class. +# - An R language object like `quote(layer_dense)` (will be evaluated in the calling frame) +# - A Python class object, like `reticulate::import("keras")$layers$Dense`')) + +# class <- keras$Model +# class <- attr(config, "__class__", TRUE) +# if(is.null(class) || reticulate::py_is_null_xptr(class)) { +# stop("`attr(config, '__class__'` is a null pointer from an external session", +# "If you know the original config class, you can provide it as an R object (e.g., class = layer_dense)") +# class <- import(attr(config, "__class__.__module__", TRUE))[[attr(config, "__class__.__name__")]] +# } + + #' Layer/Model weights as R arrays #' #' @param object Layer or model object -#' @param trainable if `NA` (the default), all weights are returned. If `TRUE, ` +#' @param trainable if `NA` (the default), all weights are returned. If `TRUE`, +#' only weights of trainable variables are returned. If `FALSE`, only weights +#' of non-trainable variables are returned. #' @param weights Weights as R array #' -#' @note You can access the Layer/Model as `tf.Tensors` or `tf.Variables` at -#' `object$weights`, `object$trainable_weights`, or -#' `object$non_trainable_weights` +#' @note You can access the Layer/Model as `KerasVariables` (which are also +#' backend-native tensors like `tf.Variable`) at `object$weights`, +#' `object$trainable_weights`, or `object$non_trainable_weights` #' #' @family model persistence #' @family layer methods #' +#' @returns A list of R arrays. #' @export get_weights <- function(object, trainable = NA) { if(is.na(trainable)) @@ -85,7 +120,7 @@ set_weights <- function(object, weights) { #' #' @param object Layer or model object #' -#' @return An integer count +#' @returns An integer count #' #' @family layer methods #' @@ -95,75 +130,50 @@ count_params <- function(object) { } -#' Retrieve tensors for layers with multiple nodes -#' -#' Whenever you are calling a layer on some input, you are creating a new tensor -#' (the output of the layer), and you are adding a "node" to the layer, linking -#' the input tensor to the output tensor. When you are calling the same layer -#' multiple times, that layer owns multiple nodes indexed as 1, 2, 3. These -#' functions enable you to retrieve various tensor properties of layers with -#' multiple nodes. -#' -#' @param object Layer or model object + +#' Reset the state for a model, layer or metric. #' -#' @param node_index Integer, index of the node from which to retrieve the -#' attribute. E.g. `node_index = 1` will correspond to the first time the -#' layer was called. +#' @param object Model, Layer, or Metric instance #' -#' @return A tensor (or list of tensors if the layer has multiple inputs/outputs). +#' Not all Layers have resettable state (E.g., `adapt()`-able preprocessing +#' layers and rnn layers have resettable state, but a `layer_dense()` does not). +#' Calling this on a Layer instance without any resettable-state will error. #' #' @family layer methods +# @family preprocessing layers +# @family metrics +# @family rnn layers #' +#' @returns `object`, invisibly. #' @export -get_input_at <- function(object, node_index) { - object$get_input_at(as_node_index(node_index)) -} - -#' @rdname get_input_at -#' @export -get_output_at <- function(object, node_index) { - object$get_output_at(as_node_index(node_index)) -} - -#' @rdname get_input_at -#' @export -get_input_shape_at <- function(object, node_index) { - object$get_input_shape_at(as_node_index(node_index)) -} - -#' @rdname get_input_at -#' @export -get_output_shape_at <- function(object, node_index) { - object$get_output_shape_at(as_node_index(node_index)) -} - - -#' @rdname get_input_at -#' @export -get_input_mask_at <- function(object, node_index) { - object$get_input_mask_at(as_node_index(node_index)) -} - -#' @rdname get_input_at -#' @export -get_output_mask_at <- function(object, node_index) { - object$get_output_mask_at(as_node_index(node_index)) +reset_state <- function(object) { + object$reset_state() + invisible(object) } -#' Reset the states for a layer +#' Quantize the weights of a model. #' -#' @param object Model or layer object +#' @description +#' Note that the model must be built first before calling this method. +#' `quantize_weights()` will recursively call `layer$quantize(mode)` in all layers and +#' will be skipped if the layer doesn't implement the function. #' -#' @family layer methods +#' Currently only `Dense` and `EinsumDense` layers support quantization. +#' +#' @param object A Keras Model or Layer. +#' @param mode +#' The mode of the quantization. Only 'int8' is supported at this +#' time. #' #' @export -reset_states <- function(object) { - object$reset_states() - invisible(object) -} - - -as_node_index <- function(node_index) { - as.integer(node_index-1) +#' @returns `model`, invisibly. Note this is just a convenience for usage with `|>`, the +#' model is modified in-place. +#' +#' @family layer methods +#' @tether keras.Model.quantize +quantize_weights <- +function (object, mode) +{ + object$quantize(mode) } diff --git a/R/layer-r-helpers.R b/R/layer-r-helpers.R new file mode 100644 index 0000000000..18cb3c3134 --- /dev/null +++ b/R/layer-r-helpers.R @@ -0,0 +1,114 @@ + + +# ---- core ---- + + +#' Create a Keras Layer +#' +#' @param layer_class A Python Layer class +#' @param object Object to compose layer with. This is either a +#' [keras_model_sequential()] to add the layer to, or another Layer which +#' this layer will call. +#' @param args List of arguments to the layer initialize function. +#' +#' @returns A Keras layer +#' +#' @note The `object` parameter can be missing, in which case the +#' layer is created without a connection to an existing graph. +#' +#' @keywords internal +#' @noRd +create_layer <- function(LayerClass, object, args = list()) { + + # force `object` before instantiating the layer, so pipe chains create layers + # in the the intutitively expected order. + # https://github.com/rstudio/keras/issues/1440 + object <- if (missing(object)) NULL else object + + # Starting in Keras 3.1, constraints can't be simple callable functions, they + # *must* inherit from keras.constraints.Constraint() + args <- imap(args, function(arg, name) { + if (endsWith(name, "_constraint") && is_bare_r_function(arg)) + arg <- as_constraint(arg) + arg + }) + + args <- lapply(args, resolve_py_obj) + + if (!is_py_object(LayerClass)) # e.g., R6ClassGenerator + LayerClass <- r_to_py(LayerClass) + + # create layer instance by calling the LayerClass object + layer <- do.call(LayerClass, args) + + # compose if we have an `object` + if (is.null(object)) + layer + else + invisible(compose_layer(object, layer)) +} + + +# Helper function to enable composing a layer instance with a Sequential model +# via a simple call like layer(). +compose_layer <- function(object, layer, ...) { + if(missing(object) || is.null(object)) + return(layer(...)) + + # if the first arg is a Sequential model, call `model$add()` + if (inherits(object, "keras.src.models.sequential.Sequential")) { + if(length(list(...)) > 0) warning("arguments passed via ellipsis will be ignored") + + object$add(layer) + return(object) + } + + # otherwise, invoke `layer$__call__()` + layer(object, ...) +} + + +# TODO: use formals(x) in py_to_r_wrapper.Layer() to construct a better wrapper fn +# (( though, all layer.__call__ signatures are generally (...), unless user +# implemented __call__() directly insteald of call() )) + +# This is used for: +# - ALL layer instances (custom and builtin) and +# - ALL model instances (Sequential, Functional, and custom) +#' @export +py_to_r_wrapper.keras.src.layers.layer.Layer <- function(x) { + force(x) + function(object, ...) compose_layer(object = object, layer = x, ...) +} + + +# ---- convolutional ---- +normalize_padding <- function(padding, dims) { + normalize_scale("padding", padding, dims) +} + +normalize_cropping <- function(cropping, dims) { + normalize_scale("cropping", cropping, dims) +} + +normalize_scale <- function(name, scale, dims) { + + # validate and marshall scale argument + throw_invalid_scale <- function() { + stop(name, " must be a list of ", dims, " integers or list of ", dims, " lists of 2 integers", + call. = FALSE) + } + + # if all of the individual items are numeric then cast to integer vector + if (all(sapply(scale, function(x) length(x) == 1 && is.numeric(x)))) { + as.integer(scale) + } else if (is.list(scale)) { + lapply(scale, function(x) { + if (length(x) != 2) + throw_invalid_scale() + as.integer(x) + }) + } else { + throw_invalid_scale() + } +} diff --git a/R/layer-wrappers.R b/R/layer-wrappers.R deleted file mode 100644 index 658831880c..0000000000 --- a/R/layer-wrappers.R +++ /dev/null @@ -1,114 +0,0 @@ -#' This layer wrapper allows to apply a layer to every temporal slice of an input -#' -#' @details -#' Every input should be at least 3D, and the dimension of index one of the -#' first input will be considered to be the temporal dimension. -#' -#' Consider a batch of 32 video samples, where each sample is a 128x128 RGB image -#' with `channels_last` data format, across 10 timesteps. -#' The batch input shape is `(32, 10, 128, 128, 3)`. -#' -#' You can then use `TimeDistributed` to apply the same `Conv2D` layer to each -#' of the 10 timesteps, independently: -#' -#' ```R -#' input <- layer_input(c(10, 128, 128, 3)) -#' conv_layer <- layer_conv_2d(filters = 64, kernel_size = c(3, 3)) -#' output <- input %>% time_distributed(conv_layer) -#' output$shape # TensorShape([None, 10, 126, 126, 64]) -#' ``` -#' -#' Because `TimeDistributed` applies the same instance of `Conv2D` to each of the -#' timestamps, the same set of weights are used at each timestamp. -#' -#' @inheritParams layer_dense -#' @param layer a `tf.keras.layers.Layer` instance. -#' @param ... standard layer arguments. -#' -#' @seealso -#' + -#' -#' @family layer wrappers -#' @export -time_distributed <- -function(object, layer, ...) -{ - args <- - capture_args( - match.call(), - list( - input_shape = normalize_shape, - batch_input_shape = normalize_shape, - batch_size = as_nullable_integer - ), - ignore = "object" - ) - create_layer(keras$layers$TimeDistributed, object, args) -} - - -#' Bidirectional wrapper for RNNs -#' -#' @inheritParams layer_dense -#' -#' @param layer A `RNN` layer instance, such as `layer_lstm()` or -#' `layer_gru()`. It could also be a `keras$layers$Layer` instance that -#' meets the following criteria: -#' -#' 1. Be a sequence-processing layer (accepts 3D+ inputs). -#' -#' 2. Have a `go_backwards`, `return_sequences` and `return_state` attribute -#' (with the same semantics as for the `RNN` class). -#' -#' 3. Have an `input_spec` attribute. -#' -#' 4. Implement serialization via `get_config()` and `from_config()`. Note -#' that the recommended way to create new RNN layers is to write a custom RNN -#' cell and use it with `layer_rnn()`, instead of subclassing -#' `keras$layers$Layer` directly. -#' -#' 5. When `returns_sequences = TRUE`, the output of the masked timestep will -#' be zero regardless of the layer's original `zero_output_for_mask` value. -#' -#' @param merge_mode Mode by which outputs of the forward and backward RNNs will -#' be combined. One of `'sum'`, `'mul'`, `'concat'`, `'ave'`, `NULL`. If -#' `NULL`, the outputs will not be combined, they will be returned as a list. -#' Default value is `'concat'`. -#' -#' @param weights Split and propagated to the `initial_weights` attribute on the -#' forward and backward layer. -#' -#' @param backward_layer Optional `keras.layers.RNN`, or `keras.layers.Layer` -#' instance to be used to handle backwards input processing. If -#' `backward_layer` is not provided, the layer instance passed as the `layer` -#' argument will be used to generate the backward layer automatically. Note -#' that the provided `backward_layer` layer should have properties matching -#' those of the `layer` argument, in particular it should have the same values -#' for `stateful`, `return_states`, `return_sequences`, etc. In addition, -#' `backward_layer` and `layer` should have different `go_backwards` argument -#' values. A `ValueError` will be raised if these requirements are not met. -#' -#' @param ... standard layer arguments. -#' -#' @family layer wrappers -#' @seealso -#' -#' - -#' - -#' -#' @export -bidirectional <- -function(object, layer, merge_mode = "concat", - weights = NULL, backward_layer = NULL, ...) -{ - args <- capture_args( - match.call(), - modifiers = list( - input_shape = normalize_shape, - batch_input_shape = normalize_shape, - batch_size = as_nullable_integer - ), - ignore = "object" - ) - create_layer(keras$layers$Bidirectional, object, args) -} diff --git a/R/layers-activations.R b/R/layers-activations.R index 710222ab45..e45562e700 100644 --- a/R/layers-activations.R +++ b/R/layers-activations.R @@ -1,272 +1,295 @@ -#' Apply an activation function to an output. -#' -#' @param input_shape Input shape (list of integers, does not include the -#' samples axis) which is required when using this layer as the first layer in -#' a model. -#' -#' @inheritParams layer_dense -#' -#' @family core layers -#' @family activation layers -#' -#' @export -layer_activation <- function(object, activation, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$Activation, object, list( - activation = activation, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) -} - -#' Leaky version of a Rectified Linear Unit. +#' Applies an activation function to an output. #' -#' Allows a small gradient when the unit is not active: `f(x) = alpha * x` for -#' `x < 0`, `f(x) = x` for `x >= 0`. +#' @description #' -#' @inheritParams layer_activation -#' @param alpha float >= 0. Negative slope coefficient. +#' # Examples +#' ```{r} +#' x <- array(c(-3, -1, 0, 2)) +#' layer <- layer_activation(activation = 'relu') +#' layer(x) +#' layer <- layer_activation(activation = activation_relu) +#' layer(x) +#' layer <- layer_activation(activation = op_relu) +#' layer(x) +#' ``` #' -#' @seealso [Rectifier Nonlinearities Improve Neural Network Acoustic -#' Models](https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf). +#' @param activation +#' Activation function. It could be a callable, or the name of +#' an activation from the `keras3::activation_*` namespace. #' -#' @family activation layers +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' +#' @inherit layer_dense return #' @export -layer_activation_leaky_relu <- function(object, alpha = 0.3, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, - dtype = NULL, name = NULL, trainable = NULL, - weights = NULL) { - - create_layer(keras$layers$LeakyReLU, object, list( - alpha = alpha, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - +#' @family activation layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Activation +layer_activation <- +function (object, activation, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Activation, object, args) } -#' Parametric Rectified Linear Unit. + +#' Applies an Exponential Linear Unit function to an output. #' -#' It follows: `f(x) = alpha * x`` for `x < 0`, `f(x) = x` for `x >= 0`, where -#' alpha is a learned array with the same shape as x. +#' @description +#' Formula: #' -#' @inheritParams layer_activation -#' @param alpha_initializer Initializer function for the weights. -#' @param alpha_regularizer Regularizer for the weights. -#' @param alpha_constraint Constraint for the weights. -#' @param shared_axes The axes along which to share learnable parameters for the -#' activation function. For example, if the incoming feature maps are from a -#' 2D convolution with output shape (batch, height, width, channels), and you -#' wish to share parameters across space so that each filter only has one set -#' of parameters, set shared_axes=c(1, 2). +#' ``` +#' f(x) = alpha * (exp(x) - 1.) for x < 0 +#' f(x) = x for x >= 0 +#' ``` #' -#' @seealso [Delving Deep into Rectifiers: Surpassing Human-Level Performance on -#' ImageNet Classification](https://arxiv.org/abs/1502.01852). +#' @param alpha +#' float, slope of negative section. Defaults to `1.0`. #' -#' @family activation layers +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. #' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return #' @export -layer_activation_parametric_relu <- function(object, alpha_initializer = "zeros", alpha_regularizer = NULL, - alpha_constraint = NULL, shared_axes = NULL, - input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, - dtype = NULL, name = NULL, trainable = NULL, - weights = NULL) { - - # build args - args <- list( - alpha_initializer = alpha_initializer, - alpha_regularizer = alpha_regularizer, - alpha_constraint = alpha_constraint - ) - if (!is.null(shared_axes)) - args$shared_axes <- as.list(as.integer(shared_axes)) - args$input_shape <- normalize_shape(input_shape) - args$batch_input_shape <- normalize_shape(batch_input_shape) - args$batch_size <- as_nullable_integer(batch_size) - args$dtype <- dtype - args$name <- name - args$trainable <- trainable - args$weights <- weights - - # call layer - create_layer(keras$layers$PReLU, object, args) +#' @family activation layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.ELU +layer_activation_elu <- +function (object, alpha = 1, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$ELU, object, args) } -#' Thresholded Rectified Linear Unit. +#' Leaky version of a Rectified Linear Unit activation layer. #' -#' It follows: `f(x) = x` for `x > theta`, `f(x) = 0` otherwise. +#' @description +#' This layer allows a small gradient when the unit is not active. #' -#' @inheritParams layer_activation -#' @param theta float >= 0. Threshold location of activation. +#' Formula: #' -#' @seealso [Zero-bias autoencoders and the benefits of co-adapting features](https://arxiv.org/abs/1402.3337). +#' ```r +#' f <- function(x) ifelse(x >= 0, x, alpha * x) +#' ``` #' -#' @family activation layers +#' # Examples +#' ```{r} +#' leaky_relu_layer <- layer_activation_leaky_relu(negative_slope=0.5) +#' input <- array(c(-10, -5, 0.0, 5, 10)) +#' result <- leaky_relu_layer(input) +#' as.array(result) +#' ``` #' -#' @export -layer_activation_thresholded_relu <- function(object, theta = 1.0, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, - dtype = NULL, name = NULL, trainable = NULL, - weights = NULL) { - - create_layer(keras$layers$ThresholdedReLU, object, list( - theta = theta, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - -} - - -#' Exponential Linear Unit. +#' @param negative_slope +#' Float >= 0.0. Negative slope coefficient. +#' Defaults to `0.3`. #' -#' It follows: `f(x) = alpha * (exp(x) - 1.0)` for `x < 0`, `f(x) = x` for `x >= 0`. +#' @param ... +#' Base layer keyword arguments, such as +#' `name` and `dtype`. #' -#' @inheritParams layer_activation -#' @param alpha Scale for the negative factor. -#' -#' @seealso [Fast and Accurate Deep Network Learning by Exponential Linear Units -#' (ELUs)](https://arxiv.org/abs/1511.07289v1). -#' -#' @family activation layers +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' +#' @inherit layer_dense return #' @export -layer_activation_elu <- function(object, alpha = 1.0, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$ELU, object, list( - alpha = alpha, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - +#' @family activation layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.LeakyReLU +layer_activation_leaky_relu <- +function (object, negative_slope = 0.3, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$LeakyReLU, object, args) } -#' Scaled Exponential Linear Unit. + +#' Parametric Rectified Linear Unit activation layer. #' -#' SELU is equal to: `scale * elu(x, alpha)`, where alpha and scale -#' are pre-defined constants. +#' @description +#' Formula: +#' ```r +#' f <- function(x) ifelse(x >= 0, x, alpha * x) +#' ``` +#' where `alpha` is a learned array with the same shape as x. #' -#' The values of `alpha` and `scale` are -#' chosen so that the mean and variance of the inputs are preserved -#' between two consecutive layers as long as the weights are initialized -#' correctly (see initializer_lecun_normal) and the number of inputs -#' is "large enough" (see article for more information). +#' @param alpha_initializer +#' Initializer function for the weights. #' -#' Note: -#' - To be used together with the initialization "lecun_normal". -#' - To be used together with the dropout variant "AlphaDropout". +#' @param alpha_regularizer +#' Regularizer for the weights. #' -#' @inheritParams layer_activation +#' @param alpha_constraint +#' Constraint for the weights. #' -#' @seealso [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515), \code{\link{initializer_lecun_normal}}, \code{\link{layer_alpha_dropout}} +#' @param shared_axes +#' The axes along which to share learnable parameters for the +#' activation function. For example, if the incoming feature maps are +#' from a 2D convolution with output shape +#' `(batch, height, width, channels)`, and you wish to share parameters +#' across space so that each filter only has one set of parameters, +#' set `shared_axes=[1, 2]`. #' -#' @family activation layers +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' +#' @inherit layer_dense return #' @export -layer_activation_selu <- function(object, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { +#' @family activation layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.PReLU +layer_activation_parametric_relu <- +function (object, alpha_initializer = "Zeros", alpha_regularizer = NULL, + alpha_constraint = NULL, shared_axes = NULL, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + shared_axes = as_axis), ignore = "object") + create_layer(keras$layers$PReLU, object, args) +} - create_layer(keras$layers$Activation, object, list( - activation = "selu", - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) +#' Rectified Linear Unit activation function layer. +#' +#' @description +#' Formula: +#' +#' ```r +#' f <- function(x, max_value = Inf, negative_slope = 0, threshold = 0) { +#' x <- max(x,0) +#' if (x >= max_value) +#' max_value +#' else if (threshold <= x && x < max_value) +#' x +#' else +#' negative_slope * (x - threshold) +#' } +#' ``` +#' +#' # Examples +#' ```{r} +#' relu_layer <- layer_activation_relu(max_value = 10, +#' negative_slope = 0.5, +#' threshold = 0) +#' input <- array(c(-10, -5, 0.0, 5, 10)) +#' result <- relu_layer(input) +#' as.array(result) +#' ``` +#' +#' @param max_value +#' Float >= 0. Maximum activation value. `NULL` means unlimited. +#' Defaults to `NULL`. +#' +#' @param negative_slope +#' Float >= 0. Negative slope coefficient. +#' Defaults to `0.0`. +#' +#' @param threshold +#' Float >= 0. Threshold value for thresholded activation. +#' Defaults to `0.0`. +#' +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return +#' @export +#' @family activation layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.ReLU +layer_activation_relu <- +function (object, max_value = NULL, negative_slope = 0, threshold = 0, + ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$ReLU, object, args) } -#' Softmax activation function. + +#' Softmax activation layer. #' -#' It follows: `f(x) = alpha * (exp(x) - 1.0)` for `x < 0`, `f(x) = x` for `x >= 0`. +#' @description +#' Formula: +#' ``` +#' exp_x = exp(x - max(x)) +#' f(x) = exp_x / sum(exp_x) +#' ``` #' -#' @inheritParams layer_activation -#' @param axis Integer, axis along which the softmax normalization is applied. +#' # Examples +#' ```{r} +#' softmax_layer <- layer_activation_softmax() +#' input <- op_array(c(1, 2, 1)) +#' softmax_layer(input) +#' ``` #' -#' @family activation layers +#' # Call Arguments +#' - `inputs`: The inputs (logits) to the softmax layer. +#' - `mask`: A boolean mask of the same shape as `inputs`. The mask +#' specifies 1 to keep and 0 to mask. Defaults to `NULL`. #' -#' @export -layer_activation_softmax <- function(object, axis = -1, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$Softmax, object, list( - axis = as.integer(axis), - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Rectified Linear Unit activation function +#' @returns +#' Softmaxed output with the same shape as `inputs`. #' -#' @inheritParams layer_activation +#' @param axis +#' Integer, or list of Integers, axis along which the softmax +#' normalization is applied. #' -#' @param max_value loat, the maximum output value. -#' @param negative_slope float >= 0 Negative slope coefficient. -#' @param threshold float. Threshold value for thresholded activation. +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. #' -#' @family activation layers +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' #' @export -layer_activation_relu <- function(object, max_value = NULL, negative_slope = 0, threshold = 0, - input_shape = NULL, batch_input_shape = NULL, batch_size = NULL, - dtype = NULL, name = NULL, trainable = NULL, - weights = NULL) { - - args <- list( - max_value = max_value, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.2.3") { - args$negative_slope <- negative_slope - args$threshold <- threshold - } - - create_layer(keras$layers$ReLU, object, args) +#' @family activation layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Softmax +layer_activation_softmax <- +function (object, axis = -1L, ...) +{ + args <- capture_args(list(axis = as_axis, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Softmax, object, args) } diff --git a/R/layers-attention.R b/R/layers-attention.R new file mode 100644 index 0000000000..ef124a7326 --- /dev/null +++ b/R/layers-attention.R @@ -0,0 +1,433 @@ + + + +#' Additive attention layer, a.k.a. Bahdanau-style attention. +#' +#' @description +#' Inputs are a list with 2 or 3 elements: +#' 1. A `query` tensor of shape `(batch_size, Tq, dim)`. +#' 2. A `value` tensor of shape `(batch_size, Tv, dim)`. +#' 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none +#' supplied, `value` will be used as `key`. +#' +#' The calculation follows the steps: +#' 1. Calculate attention scores using `query` and `key` with shape +#' `(batch_size, Tq, Tv)` as a non-linear sum +#' `scores = reduce_sum(tanh(query + key), axis=-1)`. +#' 2. Use scores to calculate a softmax distribution with shape +#' `(batch_size, Tq, Tv)`. +#' 3. Use the softmax distribution to create a linear combination of `value` +#' with shape `(batch_size, Tq, dim)`. +#' +#' # Call Arguments +#' - `inputs`: List of the following tensors: +#' - `query`: Query tensor of shape `(batch_size, Tq, dim)`. +#' - `value`: Value tensor of shape `(batch_size, Tv, dim)`. +#' - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If +#' not given, will use `value` for both `key` and `value`, which is +#' the most common case. +#' - `mask`: List of the following tensors: +#' - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`. +#' If given, the output will be zero at the positions where +#' `mask==FALSE`. +#' - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`. +#' If given, will apply the mask such that values at positions +#' where `mask==FALSE` do not contribute to the result. +#' - `return_attention_scores`: bool, it `TRUE`, returns the attention scores +#' (after masking and softmax) as an additional output argument. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode (adding dropout) or in inference mode (no dropout). +#' - `use_causal_mask`: Boolean. Set to `TRUE` for decoder self-attention. Adds +#' a mask such that position `i` cannot attend to positions `j > i`. +#' This prevents the flow of information from the future towards the +#' past. Defaults to `FALSE`. +#' +#' # Output +#' Attention outputs of shape `(batch_size, Tq, dim)`. +#' (Optional) Attention scores after masking and softmax with shape +#' `(batch_size, Tq, Tv)`. +#' +#' @param use_scale +#' If `TRUE`, will create a scalar variable to scale the +#' attention scores. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' attention scores. Defaults to `0.0`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family attention layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.AdditiveAttention +layer_additive_attention <- +function (object, use_scale = TRUE, dropout = 0, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$AdditiveAttention, object, args) +} + + +#' Dot-product attention layer, a.k.a. Luong-style attention. +#' +#' @description +#' Inputs are a list with 2 or 3 elements: +#' 1. A `query` tensor of shape `(batch_size, Tq, dim)`. +#' 2. A `value` tensor of shape `(batch_size, Tv, dim)`. +#' 3. A optional `key` tensor of shape `(batch_size, Tv, dim)`. If none +#' supplied, `value` will be used as a `key`. +#' +#' The calculation follows the steps: +#' 1. Calculate attention scores using `query` and `key` with shape +#' `(batch_size, Tq, Tv)`. +#' 2. Use scores to calculate a softmax distribution with shape +#' `(batch_size, Tq, Tv)`. +#' 3. Use the softmax distribution to create a linear combination of `value` +#' with shape `(batch_size, Tq, dim)`. +#' +#' # Call Arguments +#' - `inputs`: List of the following tensors: +#' - `query`: Query tensor of shape `(batch_size, Tq, dim)`. +#' - `value`: Value tensor of shape `(batch_size, Tv, dim)`. +#' - `key`: Optional key tensor of shape `(batch_size, Tv, dim)`. If +#' not given, will use `value` for both `key` and `value`, which is +#' the most common case. +#' - `mask`: List of the following tensors: +#' - `query_mask`: A boolean mask tensor of shape `(batch_size, Tq)`. +#' If given, the output will be zero at the positions where +#' `mask==FALSE`. +#' - `value_mask`: A boolean mask tensor of shape `(batch_size, Tv)`. +#' If given, will apply the mask such that values at positions +#' where `mask==FALSE` do not contribute to the result. +#' - `return_attention_scores`: bool, it `TRUE`, returns the attention scores +#' (after masking and softmax) as an additional output argument. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode (adding dropout) or in inference mode (no dropout). +#' - `use_causal_mask`: Boolean. Set to `TRUE` for decoder self-attention. Adds +#' a mask such that position `i` cannot attend to positions `j > i`. +#' This prevents the flow of information from the future towards the +#' past. Defaults to `FALSE`. +#' +#' # Output +#' Attention outputs of shape `(batch_size, Tq, dim)`. +#' (Optional) Attention scores after masking and softmax with shape +#' `(batch_size, Tq, Tv)`. +#' +#' @param use_scale +#' If `TRUE`, will create a scalar variable to scale the +#' attention scores. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' attention scores. Defaults to `0.0`. +#' +#' @param seed +#' An integer to use as random seed incase of `dropout`. +#' +#' @param score_mode +#' Function to use to compute attention scores, one of +#' `{"dot", "concat"}`. `"dot"` refers to the dot product between the +#' query and key vectors. `"concat"` refers to the hyperbolic tangent +#' of the concatenation of the `query` and `key` vectors. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family attention layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Attention +layer_attention <- +function (object, use_scale = FALSE, score_mode = "dot", dropout = 0, + seed = NULL, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Attention, object, args) +} + + +#' Grouped Query Attention layer. +#' +#' @description +#' This is an implementation of grouped-query attention introduced by +#' [Ainslie et al., 2023](https://arxiv.org/abs/2305.13245). Here +#' `num_key_value_heads` denotes number of groups, setting +#' `num_key_value_heads` to 1 is equivalent to multi-query attention, and +#' when `num_key_value_heads` is equal to `num_query_heads` it is equivalent +#' to multi-head attention. +#' +#' This layer first projects `query`, `key`, and `value` tensors. Then, `key` +#' and `value` are repeated to match the number of heads of `query`. +#' +#' Then, the `query` is scaled and dot-producted with `key` tensors. These are +#' softmaxed to obtain attention probabilities. The value tensors are then +#' interpolated by these probabilities and concatenated back to a single +#' tensor. +#' +#' # Call Arguments +#' - `query`: Query tensor of shape `(batch_dim, target_seq_len, feature_dim)`, +#' where `batch_dim` is batch size, `target_seq_len` is the length of +#' target sequence, and `feature_dim` is dimension of feature. +#' - `value`: Value tensor of shape `(batch_dim, source_seq_len, feature_dim)`, +#' where `batch_dim` is batch size, `source_seq_len` is the length of +#' source sequence, and `feature_dim` is dimension of feature. +#' - `key`: Optional key tensor of shape +#' `(batch_dim, source_seq_len, feature_dim)`. If not given, will use +#' `value` for both `key` and `value`, which is most common case. +#' - `attention_mask`: A boolean mask of shape +#' `(batch_dim, target_seq_len, source_seq_len)`, that prevents +#' attention to certain positions. The boolean mask specifies which +#' query elements can attend to which key elements, where 1 indicates +#' attention and 0 indicates no attention. Broadcasting can happen for +#' the missing batch dimensions and the head dimension. +#' - `return_attention_scores`: A boolean to indicate whether the output +#' should be `(attention_output, attention_scores)` if `TRUE`, or +#' `attention_output` if `FALSE`. Defaults to `FALSE`. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode (adding dropout) or in inference mode (no dropout). +#' Will go with either using the training mode of the parent +#' layer/model or `FALSE` (inference) if there is no parent layer. +#' - `use_causal_mask`: A boolean to indicate whether to apply a causal mask to +#' prevent tokens from attending to future tokens (e.g., used in a +#' decoder Transformer). +#' +#' @returns +#' attention_output: Result of the computation, of shape +#' `(batch_dim, target_seq_len, feature_dim)`, where `target_seq_len` +#' is for target sequence length and `feature_dim` is the query input +#' last dim. +#' attention_scores: (Optional) attention coefficients of shape +#' `(batch_dim, num_query_heads, target_seq_len, source_seq_len)`. +#' +#' @param head_dim +#' Size of each attention head. +#' +#' @param num_query_heads +#' Number of query attention heads. +#' +#' @param num_key_value_heads +#' Number of key and value attention heads. +#' +#' @param dropout +#' Dropout probability. +#' +#' @param use_bias +#' Boolean, whether the dense layers use bias vectors/matrices. +#' +#' @param kernel_initializer +#' Initializer for dense layer kernels. +#' +#' @param bias_initializer +#' Initializer for dense layer biases. +#' +#' @param kernel_regularizer +#' Regularizer for dense layer kernels. +#' +#' @param bias_regularizer +#' Regularizer for dense layer biases. +#' +#' @param activity_regularizer +#' Regularizer for dense layer activity. +#' +#' @param kernel_constraint +#' Constraint for dense layer kernels. +#' +#' @param bias_constraint +#' Constraint for dense layer kernels. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family attention layers +#' @family layers +# @seealso +# + +#' @tether keras.layers.GroupQueryAttention +layer_group_query_attention <- +function (object, head_dim, num_query_heads, num_key_value_heads, + dropout = 0, use_bias = TRUE, kernel_initializer = "glorot_uniform", + bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, + activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, + ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GroupQueryAttention, object, args) +} + + +#' Multi Head Attention layer. +#' +#' @description +#' This is an implementation of multi-headed attention as described in the +#' paper "Attention is all you Need" +#' [Vaswani et al., 2017](https://arxiv.org/abs/1706.03762). +#' If `query`, `key,` `value` are the same, then +#' this is self-attention. Each timestep in `query` attends to the +#' corresponding sequence in `key`, and returns a fixed-width vector. +#' +#' This layer first projects `query`, `key` and `value`. These are +#' (effectively) a list of tensors of length `num_attention_heads`, where the +#' corresponding shapes are `(batch_size, , key_dim)`, +#' `(batch_size, , key_dim)`, +#' `(batch_size, , value_dim)`. +#' +#' Then, the query and key tensors are dot-producted and scaled. These are +#' softmaxed to obtain attention probabilities. The value tensors are then +#' interpolated by these probabilities, then concatenated back to a single +#' tensor. +#' +#' Finally, the result tensor with the last dimension as `value_dim` can take +#' a linear projection and return. +#' +#' # Call Arguments +#' - `query`: Query tensor of shape `(B, T, dim)`, where `B` is the batch size, +#' `T` is the target sequence length, and dim is the feature dimension. +#' - `value`: Value tensor of shape `(B, S, dim)`, where `B` is the batch size, +#' `S` is the source sequence length, and dim is the feature dimension. +#' - `key`: Optional key tensor of shape `(B, S, dim)`. If not given, will +#' use `value` for both `key` and `value`, which is the most common +#' case. +#' - `attention_mask`: a boolean mask of shape `(B, T, S)`, that prevents +#' attention to certain positions. The boolean mask specifies which +#' query elements can attend to which key elements, 1 indicates +#' attention and 0 indicates no attention. Broadcasting can happen for +#' the missing batch dimensions and the head dimension. +#' - `return_attention_scores`: A boolean to indicate whether the output should +#' be `(attention_output, attention_scores)` if `TRUE`, or +#' `attention_output` if `FALSE`. Defaults to `FALSE`. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode (adding dropout) or in inference mode (no dropout). +#' Will go with either using the training mode of the parent +#' layer/model, or `FALSE` (inference) if there is no parent layer. +#' - `use_causal_mask`: A boolean to indicate whether to apply a causal mask to +#' prevent tokens from attending to future tokens (e.g., used in a +#' decoder Transformer). +#' +#' +#' # Call return +#' - attention_output: The result of the computation, of shape `(B, T, E)`, +#' where `T` is for target sequence shapes and `E` is the query input +#' last dimension if `output_shape` is `NULL`. Otherwise, the +#' multi-head outputs are projected to the shape specified by +#' `output_shape`. +#' - attention_scores: (Optional) multi-head attention coefficients over +#' attention axes. +#' +#' # Properties +#' A `MultiHeadAttention` `Layer` instance has the following additional read-only properties: +#' +#' - `attention_axes` +#' - `dropout` +#' - `key_dense` +#' - `key_dim` +#' - `num_heads` +#' - `output_dense` +#' - `output_shape` +#' - `query_dense` +#' - `use_bias` +#' - `value_dense` +#' - `value_dim` +#' +#' @param num_heads +#' Number of attention heads. +#' +#' @param key_dim +#' Size of each attention head for query and key. +#' +#' @param value_dim +#' Size of each attention head for value. +#' +#' @param dropout +#' Dropout probability. +#' +#' @param use_bias +#' Boolean, whether the dense layers use bias vectors/matrices. +#' +#' @param output_shape +#' The expected shape of an output tensor, besides the batch +#' and sequence dims. If not specified, projects back to the query +#' feature dim (the query input's last dimension). +#' +#' @param attention_axes +#' axes over which the attention is applied. `NULL` means +#' attention over all axes, but batch, heads, and features. +#' +#' @param kernel_initializer +#' Initializer for dense layer kernels. +#' +#' @param bias_initializer +#' Initializer for dense layer biases. +#' +#' @param kernel_regularizer +#' Regularizer for dense layer kernels. +#' +#' @param bias_regularizer +#' Regularizer for dense layer biases. +#' +#' @param activity_regularizer +#' Regularizer for dense layer activity. +#' +#' @param kernel_constraint +#' Constraint for dense layer kernels. +#' +#' @param bias_constraint +#' Constraint for dense layer kernels. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param inputs +#' see description +#' +#' @inherit layer_dense return +#' @export +#' @family attention layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.MultiHeadAttention +layer_multi_head_attention <- +function (inputs, num_heads, key_dim, value_dim = NULL, dropout = 0, + use_bias = TRUE, output_shape = NULL, attention_axes = NULL, + kernel_initializer = "glorot_uniform", bias_initializer = "zeros", + kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + kernel_constraint = NULL, bias_constraint = NULL, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + num_heads = as_integer, key_dim = as_integer, value_dim = as_integer, + attention_axes = as_integer), ignore = "inputs") + layer <- create_layer(keras$layers$MultiHeadAttention, NULL, args) + if (missing(inputs) || is.null(inputs)) + return(layer) + if (!is.list(inputs)) + inputs <- list(inputs) + do.call(layer, inputs) +} diff --git a/R/layers-backend-wrappers.R b/R/layers-backend-wrappers.R new file mode 100644 index 0000000000..b55eac30cc --- /dev/null +++ b/R/layers-backend-wrappers.R @@ -0,0 +1,479 @@ + + +#' Torch module wrapper layer. +#' +#' @description +#' `layer_torch_module_wrapper` is a wrapper class that can turn any +#' `torch.nn.Module` into a Keras layer, in particular by making its +#' parameters trackable by Keras. +#' +#' # Example +#' Here's an example of how the [`layer_torch_module_wrapper()`] can be used with vanilla +#' PyTorch modules. +#' +#' ```r +#' # reticulate::py_install( +#' # packages = c("torch", "torchvision", "torchaudio"), +#' # envname = "r-keras", +#' # pip_options = c("--index-url https://download.pytorch.org/whl/cpu") +#' # ) +#' library(keras3) +#' use_backend("torch") +#' torch <- reticulate::import("torch") +#' nn <- reticulate::import("torch.nn") +#' nnf <- reticulate::import("torch.nn.functional") +#' +#' Classifier(keras$Model) \%py_class\% { +#' initialize <- function(...) { +#' super$initialize(...) +#' +#' self$conv1 <- layer_torch_module_wrapper(module = nn$Conv2d( +#' in_channels = 1L, +#' out_channels = 32L, +#' kernel_size = tuple(3L, 3L) +#' )) +#' self$conv2 <- layer_torch_module_wrapper(module = nn$Conv2d( +#' in_channels = 32L, +#' out_channels = 64L, +#' kernel_size = tuple(3L, 3L) +#' )) +#' self$pool <- nn$MaxPool2d(kernel_size = tuple(2L, 2L)) +#' self$flatten <- nn$Flatten() +#' self$dropout <- nn$Dropout(p = 0.5) +#' self$fc <- +#' layer_torch_module_wrapper(module = nn$Linear(1600L, 10L)) +#' } +#' +#' call <- function(inputs) { +#' x <- nnf$relu(self$conv1(inputs)) +#' x <- self$pool(x) +#' x <- nnf$relu(self$conv2(x)) +#' x <- self$pool(x) +#' x <- self$flatten(x) +#' x <- self$dropout(x) +#' x <- self$fc(x) +#' nnf$softmax(x, dim = 1L) +#' } +#' } +#' model <- Classifier() +#' model$build(shape(1, 28, 28)) +#' cat("Output shape:", format(shape(model(torch$ones(1L, 1L, 28L, 28L))))) +#' +#' model |> compile(loss = "sparse_categorical_crossentropy", +#' optimizer = "adam", +#' metrics = "accuracy") +#' ``` +#' ```r +#' model |> fit(train_loader, epochs = 5) +#' ``` +#' +#' @param module +#' `torch.nn.Module` instance. If it's a `LazyModule` +#' instance, then its parameters must be initialized before +#' passing the instance to `layer_torch_module_wrapper` (e.g. by calling +#' it once). +#' +#' @param name +#' The name of the layer (string). +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family wrapping layers +#' @family layers +#' @tether keras.layers.TorchModuleWrapper +layer_torch_module_wrapper <- +function (object, module, name = NULL, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$TorchModuleWrapper, object, args) +} + + +#' Keras Layer that wraps a [Flax](https://flax.readthedocs.io) module. +#' +#' @description +#' This layer enables the use of Flax components in the form of +#' [`flax.linen.Module`]( +#' https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) +#' instances within Keras when using JAX as the backend for Keras. +#' +#' The module method to use for the forward pass can be specified via the +#' `method` argument and is `__call__` by default. This method must take the +#' following arguments with these exact names: +#' +#' - `self` if the method is bound to the module, which is the case for the +#' default of `__call__`, and `module` otherwise to pass the module. +#' - `inputs`: the inputs to the model, a JAX array or a `PyTree` of arrays. +#' - `training` *(optional)*: an argument specifying if we're in training mode +#' or inference mode, `TRUE` is passed in training mode. +#' +#' `FlaxLayer` handles the non-trainable state of your model and required RNGs +#' automatically. Note that the `mutable` parameter of +#' [`flax.linen.Module.apply()`]( +#' https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html#flax.linen.apply) +#' is set to `DenyList(["params"])`, therefore making the assumption that all +#' the variables outside of the "params" collection are non-trainable weights. +#' +#' This example shows how to create a `FlaxLayer` from a Flax `Module` with +#' the default `__call__` method and no training argument: +#' +#' ```{r} +#' # keras3::use_backend("jax") +#' # py_install("flax", "r-keras") +#' +#' if(config_backend() == "jax" && +#' reticulate::py_module_available("flax")) { +#' +#' flax <- import("flax") +#' +#' MyFlaxModule(flax$linen$Module) %py_class% { +#' `__call__` <- flax$linen$compact(\(self, inputs) { +#' inputs |> +#' (flax$linen$Conv(features = 32L, kernel_size = tuple(3L, 3L)))() |> +#' flax$linen$relu() |> +#' flax$linen$avg_pool(window_shape = tuple(2L, 2L), +#' strides = tuple(2L, 2L)) |> +#' # flatten all except batch_size axis +#' (\(x) x$reshape(tuple(x$shape[[1]], -1L)))() |> +#' (flax$linen$Dense(features = 200L))() |> +#' flax$linen$relu() |> +#' (flax$linen$Dense(features = 10L))() |> +#' flax$linen$softmax() +#' }) +#' } +#' +#' # typical usage: +#' input <- keras_input(c(28, 28, 3)) +#' output <- input |> +#' layer_flax_module_wrapper(MyFlaxModule()) +#' +#' model <- keras_model(input, output) +#' +#' # to instantiate the layer before composing: +#' flax_module <- MyFlaxModule() +#' keras_layer <- layer_flax_module_wrapper(module = flax_module) +#' +#' input <- keras_input(c(28, 28, 3)) +#' output <- input |> +#' keras_layer() +#' +#' model <- keras_model(input, output) +#' +#' } +#' ``` +#' +#' This example shows how to wrap the module method to conform to the required +#' signature. This allows having multiple input arguments and a training +#' argument that has a different name and values. This additionally shows how +#' to use a function that is not bound to the module. +#' +#' ```r +#' flax <- import("flax") +#' +#' MyFlaxModule(flax$linen$Module) \%py_class\% { +#' forward <- +#' flax$linen$compact(\(self, inputs1, input2, deterministic) { +#' # do work .... +#' outputs # return +#' }) +#' } +#' +#' my_flax_module_wrapper <- function(module, inputs, training) { +#' c(input1, input2) \%<-\% inputs +#' module$forward(input1, input2,!training) +#' } +#' +#' flax_module <- MyFlaxModule() +#' keras_layer <- layer_flax_module_wrapper(module = flax_module, +#' method = my_flax_module_wrapper) +#' ``` +#' +#' @param module +#' An instance of `flax.linen.Module` or subclass. +#' +#' @param method +#' The method to call the model. This is generally a method in the +#' `Module`. If not provided, the `__call__` method is used. `method` +#' can also be a function not defined in the `Module`, in which case it +#' must take the `Module` as the first argument. It is used for both +#' `Module.init` and `Module.apply`. Details are documented in the +#' `method` argument of [`flax.linen.Module.apply()`]( +#' https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html#flax.linen.apply). +#' +#' @param variables +#' A `dict` (named R list) containing all the variables of the module in the +#' same format as what is returned by [`flax.linen.Module.init()`]( +#' https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html#flax.linen.init). +#' It should contain a `"params"` key and, if applicable, other keys for +#' collections of variables for non-trainable state. This allows +#' passing trained parameters and learned non-trainable state or +#' controlling the initialization. If `NULL` is passed, the module's +#' `init` function is called at build time to initialize the variables +#' of the model. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family wrapping layers +#' @family layers +#' @tether keras.layers.FlaxLayer +#' @seealso +#' + +layer_flax_module_wrapper <- +function (object, module, method = NULL, variables = NULL, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$FlaxLayer, object, args) +} + + +#' Keras Layer that wraps a JAX model. +#' +#' @description +#' This layer enables the use of JAX components within Keras when using JAX as +#' the backend for Keras. +#' +#' # Model function +#' +#' This layer accepts JAX models in the form of a function, `call_fn()`, which +#' must take the following arguments with these exact names: +#' +#' - `params`: trainable parameters of the model. +#' - `state` (*optional*): non-trainable state of the model. Can be omitted if +#' the model has no non-trainable state. +#' - `rng` (*optional*): a `jax.random.PRNGKey` instance. Can be omitted if the +#' model does not need RNGs, neither during training nor during inference. +#' - `inputs`: inputs to the model, a JAX array or a `PyTree` of arrays. +#' - `training` (*optional*): an argument specifying if we're in training mode +#' or inference mode, `TRUE` is passed in training mode. Can be omitted if +#' the model behaves the same in training mode and inference mode. +#' +#' The `inputs` argument is mandatory. Inputs to the model must be provided via +#' a single argument. If the JAX model takes multiple inputs as separate +#' arguments, they must be combined into a single structure, for instance in a +#' `tuple()` or a `dict()`. +#' +#' ## Model weights initialization +#' +#' The initialization of the `params` and `state` of the model can be handled +#' by this layer, in which case the `init_fn()` argument must be provided. This +#' allows the model to be initialized dynamically with the right shape. +#' Alternatively, and if the shape is known, the `params` argument and +#' optionally the `state` argument can be used to create an already initialized +#' model. +#' +#' The `init_fn()` function, if provided, must take the following arguments with +#' these exact names: +#' +#' - `rng`: a `jax.random.PRNGKey` instance. +#' - `inputs`: a JAX array or a `PyTree` of arrays with placeholder values to +#' provide the shape of the inputs. +#' - `training` (*optional*): an argument specifying if we're in training mode +#' or inference mode. `True` is always passed to `init_fn`. Can be omitted +#' regardless of whether `call_fn` has a `training` argument. +#' +#' ## Models with non-trainable state +#' +#' For JAX models that have non-trainable state: +#' +#' - `call_fn()` must have a `state` argument +#' - `call_fn()` must return a `tuple()` containing the outputs of the model and +#' the new non-trainable state of the model +#' - `init_fn()` must return a `tuple()` containing the initial trainable params of +#' the model and the initial non-trainable state of the model. +#' +#' This code shows a possible combination of `call_fn()` and `init_fn()` signatures +#' for a model with non-trainable state. In this example, the model has a +#' `training` argument and an `rng` argument in `call_fn()`. +#' +#' ```r +#' stateful_call <- function(params, state, rng, inputs, training) { +#' outputs <- .... +#' new_state <- .... +#' tuple(outputs, new_state) +#' } +#' +#' stateful_init <- function(rng, inputs) { +#' initial_params <- .... +#' initial_state <- .... +#' tuple(initial_params, initial_state) +#' } +#' ``` +#' ## Models without non-trainable state +#' +#' For JAX models with no non-trainable state: +#' +#' - `call_fn()` must not have a `state` argument +#' - `call_fn()` must return only the outputs of the model +#' - `init_fn()` must return only the initial trainable params of the model. +#' +#' This code shows a possible combination of `call_fn()` and `init_fn()` signatures +#' for a model without non-trainable state. In this example, the model does not +#' have a `training` argument and does not have an `rng` argument in `call_fn()`. +#' +#' ```r +#' stateful_call <- function(pparams, inputs) { +#' outputs <- .... +#' outputs +#' } +#' +#' stateful_init <- function(rng, inputs) { +#' initial_params <- .... +#' initial_params +#' } +#' ``` +#' +#' ## Conforming to the required signature +#' +#' If a model has a different signature than the one required by `JaxLayer`, +#' one can easily write a wrapper method to adapt the arguments. This example +#' shows a model that has multiple inputs as separate arguments, expects +#' multiple RNGs in a `dict`, and has a `deterministic` argument with the +#' opposite meaning of `training`. To conform, the inputs are combined in a +#' single structure using a `tuple`, the RNG is split and used the populate the +#' expected `dict`, and the Boolean flag is negated: +#' +#' ```r +#' jax <- import("jax") +#' my_model_fn <- function(params, rngs, input1, input2, deterministic) { +#' .... +#' if (!deterministic) { +#' dropout_rng <- rngs$dropout +#' keep <- jax$random$bernoulli(dropout_rng, dropout_rate, x$shape) +#' x <- jax$numpy$where(keep, x / dropout_rate, 0) +#' .... +#' } +#' .... +#' return(outputs) +#' } +#' +#' my_model_wrapper_fn <- function(params, rng, inputs, training) { +#' c(input1, input2) %<-% inputs +#' c(rng1, rng2) %<-% jax$random$split(rng) +#' rngs <- list(dropout = rng1, preprocessing = rng2) +#' deterministic <- !training +#' my_model_fn(params, rngs, input1, input2, deterministic) +#' } +#' +#' keras_layer <- layer_jax_model_wrapper(call_fn = my_model_wrapper_fn, +#' params = initial_params) +#' ``` +#' +#' ## Usage with Haiku modules +#' +#' `JaxLayer` enables the use of [Haiku](https://dm-haiku.readthedocs.io) +#' components in the form of +#' [`haiku.Module`](https://dm-haiku.readthedocs.io/en/latest/api.html#module). +#' This is achieved by transforming the module per the Haiku pattern and then +#' passing `module.apply` in the `call_fn` parameter and `module.init` in the +#' `init_fn` parameter if needed. +#' +#' If the model has non-trainable state, it should be transformed with +#' [`haiku.transform_with_state`]( +#' https://dm-haiku.readthedocs.io/en/latest/api.html#haiku.transform_with_state). +#' If the model has no non-trainable state, it should be transformed with +#' [`haiku.transform`]( +#' https://dm-haiku.readthedocs.io/en/latest/api.html#haiku.transform). +#' Additionally, and optionally, if the module does not use RNGs in "apply", it +#' can be transformed with +#' [`haiku.without_apply_rng`]( +#' https://dm-haiku.readthedocs.io/en/latest/api.html#without-apply-rng). +#' +#' The following example shows how to create a `JaxLayer` from a Haiku module +#' that uses random number generators via `hk.next_rng_key()` and takes a +#' training positional argument: +#' +#' ```r +#' # reticulate::py_install("haiku", "r-keras") +#' hk <- import("haiku") +#' MyHaikuModule(hk$Module) \%py_class\% { +#' +#' `__call__` <- \(self, x, training) { +#' x <- hk$Conv2D(32L, tuple(3L, 3L))(x) +#' x <- jax$nn$relu(x) +#' x <- hk$AvgPool(tuple(1L, 2L, 2L, 1L), +#' tuple(1L, 2L, 2L, 1L), "VALID")(x) +#' x <- hk$Flatten()(x) +#' x <- hk$Linear(200L)(x) +#' if (training) +#' x <- hk$dropout(rng = hk$next_rng_key(), rate = 0.3, x = x) +#' x <- jax$nn$relu(x) +#' x <- hk$Linear(10L)(x) +#' x <- jax$nn$softmax(x) +#' x +#' } +#' +#' } +#' +#' my_haiku_module_fn <- function(inputs, training) { +#' module <- MyHaikuModule() +#' module(inputs, training) +#' } +#' +#' transformed_module <- hk$transform(my_haiku_module_fn) +#' +#' keras_layer <- +#' layer_jax_model_wrapper(call_fn = transformed_module$apply, +#' init_fn = transformed_module$init) +#' ``` +#' +#' @param call_fn +#' The function to call the model. See description above for the +#' list of arguments it takes and the outputs it returns. +#' +#' @param init_fn +#' the function to call to initialize the model. See description +#' above for the list of arguments it takes and the ouputs it returns. +#' If `NULL`, then `params` and/or `state` must be provided. +#' +#' @param params +#' A `PyTree` containing all the model trainable parameters. This +#' allows passing trained parameters or controlling the initialization. +#' If both `params` and `state` are `NULL`, `init_fn()` is called at +#' build time to initialize the trainable parameters of the model. +#' +#' @param state +#' A `PyTree` containing all the model non-trainable state. This +#' allows passing learned state or controlling the initialization. If +#' both `params` and `state` are `NULL`, and `call_fn()` takes a `state` +#' argument, then `init_fn()` is called at build time to initialize the +#' non-trainable state of the model. +#' +#' @param seed +#' Seed for random number generator. Optional. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family wrapping layers +#' @family layers +#' @tether keras.layers.JaxLayer +layer_jax_model_wrapper <- +function (object, call_fn, init_fn = NULL, params = NULL, state = NULL, + seed = NULL, ...) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$JaxLayer, object, args) +} diff --git a/R/layers-convolutional.R b/R/layers-convolutional.R index 455abda857..3ecbdc63a8 100644 --- a/R/layers-convolutional.R +++ b/R/layers-convolutional.R @@ -1,1720 +1,1451 @@ -#' 1D convolution layer (e.g. temporal convolution). -#' -#' This layer creates a convolution kernel that is convolved with the layer -#' input over a single spatial (or temporal) dimension to produce a tensor of -#' outputs. If `use_bias` is TRUE, a bias vector is created and added to the -#' outputs. Finally, if `activation` is not `NULL`, it is applied to the outputs -#' as well. When using this layer as the first layer in a model, provide an -#' `input_shape` argument (list of integers or `NULL `, e.g. `(10, 128)` for -#' sequences of 10 vectors of 128-dimensional vectors, or `(NULL, 128)` for -#' variable-length sequences of 128-dimensional vectors. -#' -#' @inheritParams layer_dense -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of a single integer, specifying the -#' length of the 1D convolution window. -#' @param strides An integer or list of a single integer, specifying the stride -#' length of the convolution. Specifying any stride value != 1 is incompatible -#' with specifying any `dilation_rate` value != 1. -#' @param padding One of `"valid"`, `"causal"` or `"same"` (case-insensitive). -#' `"valid"` means "no padding". -#' `"same"` results in padding the input such that the output has the same -#' length as the original input. -#' `"causal"` results in causal (dilated) convolutions, e.g. `output[t]` does -#' not depend on `input[t+1:]`. Useful when modeling temporal data where the -#' model should not violate the temporal order. See [WaveNet: A Generative -#' Model for Raw Audio, section 2.1](https://arxiv.org/abs/1609.03499). -#' @param data_format A string, one of `"channels_last"` (default) or `"channels_first"`. -#' The ordering of the dimensions in the inputs. `"channels_last"` corresponds -#' to inputs with shape `(batch, length, channels)` (default format for -#' temporal data in Keras) while `"channels_first"` corresponds to inputs -#' with shape `(batch, channels, length)`. -#' @param dilation_rate an integer or list of a single integer, specifying the -#' dilation rate to use for dilated convolution. Currently, specifying any -#' `dilation_rate` value != 1 is incompatible with specifying any `strides` -#' value != 1. -#' @param groups A positive integer specifying the number of groups in which the -#' input is split along the channel axis. Each group is convolved separately -#' with `filters / groups` filters. The output is the concatenation of all the -#' groups results along the channel axis. Input channels and `filters` must both -#' be divisible by `groups`. -#' @param activation Activation function to use. If you don't specify anything, -#' no activation is applied (ie. "linear" activation: `a(x) = x`). -#' @param use_bias Boolean, whether the layer uses a bias vector. -#' @param kernel_initializer Initializer for the `kernel` weights matrix. -#' @param bias_initializer Initializer for the bias vector. -#' @param kernel_regularizer Regularizer function applied to the `kernel` -#' weights matrix. -#' @param bias_regularizer Regularizer function applied to the bias vector. -#' @param activity_regularizer Regularizer function applied to the output of the -#' layer (its "activation").. -#' @param kernel_constraint Constraint function applied to the kernel matrix. -#' @param bias_constraint Constraint function applied to the bias vector. -#' -#' @section Input shape: 3D tensor with shape: `(batch_size, steps, input_dim)` -#' -#' @section Output shape: 3D tensor with shape: `(batch_size, new_steps, -#' filters)` `steps` value might have changed due to padding or strides. -#' -#' @family convolutional layers -#' -#' @export -layer_conv_1d <- function(object, filters, kernel_size, strides = 1L, padding = "valid", - data_format = "channels_last", - dilation_rate = 1L, groups = 1L, activation = NULL, use_bias = TRUE, - kernel_initializer = "glorot_uniform", bias_initializer = "zeros", - kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - dilation_rate = as_integer_tuple(dilation_rate), - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - if (tf_version() >= "2.3") - args$groups <- as.integer(groups) - if (keras_version() >= "2.2") - args$data_format <- data_format - create_layer(keras$layers$Conv1D, object, args) -} - - -#' 2D convolution layer (e.g. spatial convolution over images). -#' -#' This layer creates a convolution kernel that is convolved with the layer -#' input to produce a tensor of outputs. If `use_bias` is TRUE, a bias vector is -#' created and added to the outputs. Finally, if `activation` is not `NULL`, it -#' is applied to the outputs as well. When using this layer as the first layer -#' in a model, provide the keyword argument `input_shape` (list of integers, -#' does not include the sample axis), e.g. `input_shape=c(128, 128, 3)` for -#' 128x128 RGB pictures in `data_format="channels_last"`. -#' -#' @inheritParams layer_conv_1d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of 2 integers, specifying the width and -#' height of the 2D convolution window. Can be a single integer to specify the -#' same value for all spatial dimensions. -#' @param strides An integer or list of 2 integers, specifying the strides of -#' the convolution along the width and height. Can be a single integer to -#' specify the same value for all spatial dimensions. Specifying any stride -#' value != 1 is incompatible with specifying any `dilation_rate` value != 1. -#' @param padding one of `"valid"` or `"same"` (case-insensitive). Note that `"same"` -#' is slightly inconsistent across backends with `strides` != 1, as described -#' [here](https://github.com/keras-team/keras/pull/9473#issuecomment-372166860) -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, height, width, -#' channels)` while `channels_first` corresponds to inputs with shape `(batch, -#' channels, height, width)`. It defaults to the `image_data_format` value -#' found in your Keras config file at `~/.keras/keras.json`. If you never set -#' it, then it will be "channels_last". -#' @param dilation_rate an integer or list of 2 integers, specifying the -#' dilation rate to use for dilated convolution. Can be a single integer to -#' specify the same value for all spatial dimensions. Currently, specifying -#' any `dilation_rate` value != 1 is incompatible with specifying any stride -#' value != 1. -#' @param groups A positive integer specifying the number of groups in which the -#' input is split along the channel axis. Each group is convolved separately -#' with `filters / groups` filters. The output is the concatenation of all the -#' groups results along the channel axis. Input channels and `filters` must both -#' be divisible by `groups`. -#' -#' @section Input shape: 4D tensor with shape: `(samples, channels, rows, cols)` -#' if data_format='channels_first' or 4D tensor with shape: `(samples, rows, -#' cols, channels)` if data_format='channels_last'. -#' -#' @section Output shape: 4D tensor with shape: `(samples, filters, new_rows, -#' new_cols)` if data_format='channels_first' or 4D tensor with shape: -#' `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. -#' `rows` and `cols` values might have changed due to padding. -#' -#' @family convolutional layers -#' -#' @export -layer_conv_2d <- function(object, filters, kernel_size, strides = c(1L, 1L), padding = "valid", data_format = NULL, - dilation_rate = c(1L, 1L), groups = 1L, activation = NULL, use_bias = TRUE, - kernel_initializer = "glorot_uniform", bias_initializer = "zeros", - kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - dilation_rate = as_integer_tuple(dilation_rate), - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (tf_version() >= "2.3") - args$groups <- as.integer(groups) - - create_layer(keras$layers$Conv2D, object, args) -} - -#' 3D convolution layer (e.g. spatial convolution over volumes). +#' 1D convolution layer (e.g. temporal convolution). #' +#' @description #' This layer creates a convolution kernel that is convolved with the layer -#' input to produce a tensor of outputs. If `use_bias` is TRUE, a bias vector is -#' created and added to the outputs. Finally, if `activation` is not `NULL`, it -#' is applied to the outputs as well. When using this layer as the first layer -#' in a model, provide the keyword argument `input_shape` (list of integers, -#' does not include the sample axis), e.g. `input_shape=c(128L, 128L, 128L, 3L)` -#' for 128x128x128 volumes with a single channel, in -#' `data_format="channels_last"`. -#' -#' @inheritParams layer_conv_2d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of 3 integers, specifying the depth, -#' height, and width of the 3D convolution window. Can be a single integer -#' to specify the same value for all spatial dimensions. -#' @param strides An integer or list of 3 integers, specifying the strides of -#' the convolution along each spatial dimension. Can be a single integer to -#' specify the same value for all spatial dimensions. Specifying any stride -#' value != 1 is incompatible with specifying any `dilation_rate` value != 1. -#' @param padding one of `"valid"` or `"same"` (case-insensitive). -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' @param dilation_rate an integer or list of 3 integers, specifying the -#' dilation rate to use for dilated convolution. Can be a single integer to -#' specify the same value for all spatial dimensions. Currently, specifying -#' any `dilation_rate` value != 1 is incompatible with specifying any stride -#' value != 1. -#' @param groups A positive integer specifying the number of groups in which the -#' input is split along the channel axis. Each group is convolved separately -#' with `filters / groups` filters. The output is the concatenation of all the -#' groups results along the channel axis. Input channels and `filters` must both -#' be divisible by `groups`. -#' -#' @section Input shape: 5D tensor with shape: `(samples, channels, conv_dim1, -#' conv_dim2, conv_dim3)` if data_format='channels_first' or 5D tensor with -#' shape: `(samples, conv_dim1, conv_dim2, conv_dim3, channels)` if -#' data_format='channels_last'. -#' -#' @section Output shape: 5D tensor with shape: `(samples, filters, -#' new_conv_dim1, new_conv_dim2, new_conv_dim3)` if -#' data_format='channels_first' or 5D tensor with shape: `(samples, -#' new_conv_dim1, new_conv_dim2, new_conv_dim3, filters)` if -#' data_format='channels_last'. `new_conv_dim1`, `new_conv_dim2` and -#' `new_conv_dim3` values might have changed due to padding. -#' -#' @family convolutional layers -#' -#' @export -layer_conv_3d <- function(object, filters, kernel_size, strides = c(1L, 1L, 1L), padding = "valid", - data_format = NULL, dilation_rate = c(1L, 1L, 1L), groups = 1L, - activation = NULL, use_bias = TRUE, - kernel_initializer = "glorot_uniform", bias_initializer = "zeros", - kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - dilation_rate = as_integer_tuple(dilation_rate), - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (tf_version() >= "2.3") - args$groups <- as.integer(groups) - - create_layer(keras$layers$Conv3D, object, args) -} - -#' Transposed 1D convolution layer (sometimes called Deconvolution). -#' -#' The need for transposed convolutions generally arises from the desire to use -#' a transformation going in the opposite direction of a normal convolution, -#' i.e., from something that has the shape of the output of some convolution to -#' something that has the shape of its input while maintaining a connectivity -#' pattern that is compatible with said convolution. -#' When using this layer as the first layer in a model, -#' provide the keyword argument `input_shape` -#' (tuple of integers, does not include the sample axis), -#' e.g. `input_shape=(128, 3)` for data with 128 time steps and 3 channels. -#' -#' @inheritParams layer_conv_1d -#' -#' @param padding one of `"valid"` or `"same"` (case-insensitive). -#' @param output_padding An integer specifying the amount of padding along -#' the time dimension of the output tensor. -#' The amount of output padding must be lower than the stride. -#' If set to `NULL` (default), the output shape is inferred. -#' -#' @section Input shape: 3D tensor with shape: `(batch, steps, channels)` -#' -#' @section Output shape: 3D tensor with shape: `(batch, new_steps, filters)` -#' If `output_padding` is specified: -#' ``` -#' new_timesteps = ((timesteps - 1) * strides + kernel_size - 2 * padding + output_padding) +#' input over a single spatial (or temporal) dimension to produce a tensor of +#' outputs. If `use_bias` is `TRUE`, a bias vector is created and added to the +#' outputs. Finally, if `activation` is not `NULL`, it is applied to the +#' outputs as well. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: `(batch_shape, steps, channels)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: `(batch_shape, channels, steps)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: `(batch_shape, new_steps, filters)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: `(batch_shape, filters, new_steps)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # Example +#' ```{r} +#' # The inputs are 128-length vectors with 10 timesteps, and the +#' # batch size is 4. +#' x <- random_uniform(c(4, 10, 128)) +#' y <- x |> layer_conv_1d(32, 3, activation='relu') +#' shape(y) #' ``` #' -#' @section References: -#' - [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1) -#' -#' @family convolutional layers +#' @returns +#' A 3D tensor representing `activation(conv1d(inputs, kernel) + bias)`. +#' +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the convolution). +#' +#' @param kernel_size +#' int or list of 1 integer, specifying the size of the +#' convolution window. +#' +#' @param strides +#' int or list of 1 integer, specifying the stride length +#' of the convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, `"valid"`, `"same"` or `"causal"`(case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' `"causal"` results in causal (dilated) convolutions, e.g. `output[t]` +#' does not depend on`tail(input, t+1)`. Useful when modeling temporal data +#' where the model should not violate the temporal order. +#' See [WaveNet: A Generative Model for Raw Audio, section2.1]( +#' https://arxiv.org/abs/1609.03499). +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 1 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param groups +#' A positive int specifying the number of groups in which the +#' input is split along the channel axis. Each group is convolved +#' separately with `filters // groups` filters. The output is the +#' concatenation of all the `groups` results along the channel axis. +#' Input channels and `filters` must both be divisible by `groups`. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param kernel_initializer +#' Initializer for the convolution kernel. If `NULL`, +#' the default initializer (`"glorot_uniform"`) will be used. +#' +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. +#' +#' @param kernel_regularizer +#' Optional regularizer for the convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param kernel_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_conv_1d_transpose <- function(object, filters, kernel_size, strides = 1, padding = "valid", output_padding = NULL, - data_format = NULL, dilation_rate = 1, activation = NULL, use_bias = TRUE, - kernel_initializer = "glorot_uniform", bias_initializer = "zeros", - kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$Conv1DTranspose, object, list( - filters = as.integer(filters), - kernel_size = as.integer(kernel_size), - strides = as.integer(strides), - padding = padding, - output_padding = as_nullable_integer(output_padding), - data_format = data_format, - dilation_rate= as.integer(dilation_rate), - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) -} - - -#' Transposed 2D convolution layer (sometimes called Deconvolution). -#' -#' The need for transposed convolutions generally arises from the desire to use -#' a transformation going in the opposite direction of a normal convolution, -#' i.e., from something that has the shape of the output of some convolution to -#' something that has the shape of its input while maintaining a connectivity -#' pattern that is compatible with said convolution. When using this layer as -#' the first layer in a model, provide the keyword argument `input_shape` (list -#' of integers, does not include the sample axis), e.g. `input_shape=c(128L, -#' 128L, 3L)` for 128x128 RGB pictures in `data_format="channels_last"`. -#' -#' @inheritParams layer_conv_2d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of 2 integers, specifying the width and -#' height of the 2D convolution window. Can be a single integer to specify the -#' same value for all spatial dimensions. -#' @param strides An integer or list of 2 integers, specifying the strides of -#' the convolution along the width and height. Can be a single integer to -#' specify the same value for all spatial dimensions. Specifying any stride -#' value != 1 is incompatible with specifying any `dilation_rate` value != 1. -#' @param padding one of `"valid"` or `"same"` (case-insensitive). -#' @param output_padding An integer or list of 2 integers, -#' specifying the amount of padding along the height and width -#' of the output tensor. Can be a single integer to specify the same -#' value for all spatial dimensions. The amount of output padding along a -#' given dimension must be lower than the stride along that same dimension. -#' If set to `NULL` (default), the output shape is inferred. -#' @param dilation_rate Dialation rate. -#' -#' @section Input shape: 4D tensor with shape: `(batch, channels, rows, cols)` -#' if data_format='channels_first' or 4D tensor with shape: `(batch, rows, -#' cols, channels)` if data_format='channels_last'. -#' -#' @section Output shape: 4D tensor with shape: `(batch, filters, new_rows, -#' new_cols)` if data_format='channels_first' or 4D tensor with shape: -#' `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. -#' `rows` and `cols` values might have changed due to padding. -#' -#' @section References: -#' - [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1) -#' #' @family convolutional layers -#' -#' @export -layer_conv_2d_transpose <- function(object, filters, kernel_size, strides = c(1, 1), padding = "valid", output_padding = NULL, - data_format = NULL, dilation_rate = c(1, 1), activation = NULL, use_bias = TRUE, - kernel_initializer = "glorot_uniform", bias_initializer = "zeros", - kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.2.3") { - args$output_padding <- as_integer_tuple(output_padding) - args$dilation_rate <- as_integer_tuple(dilation_rate) - } - - create_layer(keras$layers$Conv2DTranspose, object, args) +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Conv1D +layer_conv_1d <- +function (object, filters, kernel_size, strides = 1L, padding = "valid", + data_format = NULL, dilation_rate = 1L, groups = 1L, activation = NULL, + use_bias = TRUE, kernel_initializer = "glorot_uniform", bias_initializer = "zeros", + kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + kernel_constraint = NULL, bias_constraint = NULL, ...) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + groups = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Conv1D, object, args) } -#' Transposed 3D convolution layer (sometimes called Deconvolution). +#' 1D transposed convolution layer. #' -#' The need for transposed convolutions generally arises from the desire to use +#' @description +#' The need for transposed convolutions generally arise from the desire to use #' a transformation going in the opposite direction of a normal convolution, -#' i.e., from something that has the shape of the output of some convolution to -#' something that has the shape of its input while maintaining a connectivity -#' pattern that is compatible with said convolution. -#' -#' When using this layer as the first layer in a model, provide the keyword argument -#' `input_shape` (list of integers, does not include the sample axis), e.g. -#' `input_shape = list(128, 128, 128, 3)` for a 128x128x128 volume with 3 channels if -#' `data_format="channels_last"`. -#' -#' @inheritParams layer_conv_2d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of 3 integers, specifying the depth, -#' height, and width of the 3D convolution window. Can be a single integer -#' to specify the same value for all spatial dimensions. -#' @param strides An integer or list of 3 integers, specifying the strides of -#' the convolution along the depth, height and width.. Can be a single integer -#' to specify the same value for all spatial dimensions. Specifying any stride -#' value != 1 is incompatible with specifying any `dilation_rate` value != 1. -#' @param padding one of `"valid"` or `"same"` (case-insensitive). -#' @param output_padding An integer or list of 3 integers, -#' specifying the amount of padding along the depth, height, and width -#' of the output tensor. Can be a single integer to specify the same -#' value for all spatial dimensions. The amount of output padding along a -#' given dimension must be lower than the stride along that same dimension. -#' If set to `NULL` (default), the output shape is inferred. -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, depth, height, -#' width, channels)` while `channels_first` corresponds to inputs with shape -#' `(batch, channels, depth, height, width)`. It defaults to the -#' `image_data_format` value found in your Keras config file at -#' `~/.keras/keras.json`. If you never set it, then it will be -#' "channels_last". -#' @param dilation_rate An integer or vector of 3 integers, specifying the -#' dilation rate to use for dilated convolution. Can be a single integer to -#' specify the same value for all spatial dimensions. -#' @param activation Activation function to use. If you don't specify anything, no -#' activation is applied (ie. "linear" activation: `a(x) = x`). -#' @param use_bias Boolean, whether the layer uses a bias vector. -#' @param kernel_initializer Initializer for the `kernel` weights matrix. -#' @param bias_initializer Initializer for the bias vector. -#' @param kernel_regularizer Regularizer function applied to the `kernel` -#' weights matrix, -#' @param bias_regularizer Regularizer function applied to the bias vector. -#' @param activity_regularizer Regularizer function applied to the output of the -#' layer (its "activation"). -#' @param kernel_constraint Constraint function applied to the kernel matrix. -#' @param bias_constraint Constraint function applied to the bias vector. -#' -#' @section References: -#' - [A guide to convolution arithmetic for deep learning](https://arxiv.org/abs/1603.07285v1) -#' -#' @family convolutional layers -#' -#' @export -layer_conv_3d_transpose <- function(object, filters, kernel_size, strides = c(1, 1, 1), - padding = "valid", output_padding = NULL, - data_format = NULL, dilation_rate = c(1L, 1L, 1L), - activation = NULL, use_bias = TRUE, - kernel_initializer = "glorot_uniform", bias_initializer = "zeros", - kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.2.3") - args$output_padding <- as_integer_tuple(output_padding) - - if (tf_version() >= "2.3") - args$dilation_rate <- as_integer_tuple(dilation_rate) - # TODO: warning should be issued if user supplied dilation_rate and we're - # ignoring it - - create_layer(keras$layers$Conv3DTranspose, object, args) -} - - - -#' Separable 2D convolution. -#' -#' Separable convolutions consist in first performing a depthwise spatial -#' convolution (which acts on each input channel separately) followed by a -#' pointwise convolution which mixes together the resulting output channels. The -#' `depth_multiplier` argument controls how many output channels are generated -#' per input channel in the depthwise step. Intuitively, separable convolutions -#' can be understood as a way to factorize a convolution kernel into two smaller -#' kernels, or as an extreme version of an Inception block. -#' -#' @inheritParams layer_conv_2d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of 2 integers, specifying the width and -#' height of the 2D convolution window. Can be a single integer to specify the -#' same value for all spatial dimensions. -#' @param strides An integer or list of 2 integers, specifying the strides of -#' the convolution along the width and height. Can be a single integer to -#' specify the same value for all spatial dimensions. Specifying any stride -#' value != 1 is incompatible with specifying any `dilation_rate` value != 1. -#' @param padding one of `"valid"` or `"same"` (case-insensitive). -#' @param depth_multiplier The number of depthwise convolution output channels -#' for each input channel. The total number of depthwise convolution output -#' channels will be equal to `filters_in * depth_multiplier`. -#' @param depthwise_initializer Initializer for the depthwise kernel matrix. -#' @param pointwise_initializer Initializer for the pointwise kernel matrix. -#' @param depthwise_regularizer Regularizer function applied to the depthwise -#' kernel matrix. -#' @param pointwise_regularizer Regularizer function applied to the pointwise -#' kernel matrix. -#' @param depthwise_constraint Constraint function applied to the depthwise -#' kernel matrix. -#' @param pointwise_constraint Constraint function applied to the pointwise -#' kernel matrix. -#' -#' @section Input shape: 4D tensor with shape: `(batch, channels, rows, cols)` -#' if data_format='channels_first' or 4D tensor with shape: `(batch, rows, -#' cols, channels)` if data_format='channels_last'. -#' -#' @section Output shape: 4D tensor with shape: `(batch, filters, new_rows, -#' new_cols)` if data_format='channels_first' or 4D tensor with shape: -#' `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. -#' `rows` and `cols` values might have changed due to padding. -#' -#' @family convolutional layers -#' -#' @export -layer_separable_conv_2d <- function(object, filters, kernel_size, strides = c(1, 1), padding = "valid", data_format = NULL, - dilation_rate = 1, depth_multiplier = 1, activation = NULL, use_bias = TRUE, - depthwise_initializer = "glorot_uniform", pointwise_initializer = "glorot_uniform", bias_initializer = "zeros", - depthwise_regularizer = NULL, pointwise_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - depthwise_constraint = NULL, pointwise_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - depth_multiplier = as.integer(depth_multiplier), - activation = activation, - use_bias = use_bias, - depthwise_initializer = depthwise_initializer, - pointwise_initializer = pointwise_initializer, - bias_initializer = bias_initializer, - depthwise_regularizer = depthwise_regularizer, - pointwise_regularizer = pointwise_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - depthwise_constraint = depthwise_constraint, - pointwise_constraint = pointwise_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.1.6") - args$dilation_rate <- as.integer(dilation_rate) - - create_layer(keras$layers$SeparableConv2D, object, args) - -} - - -#' Depthwise 1D convolution -#' -#' @details -#' Depthwise convolution is a type of convolution in which each input channel is -#' convolved with a different kernel (called a depthwise kernel). You -#' can understand depthwise convolution as the first step in a depthwise -#' separable convolution. -#' -#' It is implemented via the following steps: -#' -#' - Split the input into individual channels. -#' - Convolve each channel with an individual depthwise kernel with -#' `depth_multiplier` output channels. -#' - Concatenate the convolved outputs along the channels axis. +#' i.e., from something that has the shape of the output of some convolution +#' to something that has the shape of its input while maintaining a +#' connectivity pattern that is compatible with said convolution. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: `(batch_shape, steps, channels)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: `(batch_shape, channels, steps)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: `(batch_shape, new_steps, filters)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: `(batch_shape, filters, new_steps)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # References +#' - [A guide to convolution arithmetic for deep learning]( +#' https://arxiv.org/abs/1603.07285v1) +#' - [Deconvolutional Networks]( +#' https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 128)) +#' y <- x |> layer_conv_1d_transpose(32, 3, 2, activation='relu') +#' shape(y) +#' ``` #' -#' Unlike a regular 1D convolution, depthwise convolution does not mix -#' information across different input channels. +#' @returns +#' A 3D tensor representing +#' `activation(conv1d_transpose(inputs, kernel) + bias)`. #' -#' The `depth_multiplier` argument determines how many filter are applied to one -#' input channel. As such, it controls the amount of output channels that are -#' generated per input channel in the depthwise step. +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the transpose convolution). #' -#' @param kernel_size An integer, specifying the height and width of the 1D -#' convolution window. Can be a single integer to specify the same value for -#' all spatial dimensions. +#' @param kernel_size +#' int or list of 1 integer, specifying the size of the +#' transposed convolution window. #' -#' @param strides An integer, specifying the strides of the convolution along the -#' height and width. Can be a single integer to specify the same value for -#' all spatial dimensions. Specifying any stride value != 1 is incompatible -#' with specifying any `dilation_rate` value != 1. +#' @param strides +#' int or list of 1 integer, specifying the stride length +#' of the transposed convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. #' -#' @param padding one of `'valid'` or `'same'` (case-insensitive). `"valid"` means no -#' padding. `"same"` results in padding with zeros evenly to the left/right -#' or up/down of the input such that output has the same height/width -#' dimension as the input. +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. #' -#' @param depth_multiplier The number of depthwise convolution output channels for -#' each input channel. The total number of depthwise convolution output -#' channels will be equal to `filters_in * depth_multiplier`. +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. #' -#' @param data_format A string, one of `"channels_last"` (default) or `"channels_first"`. -#' The ordering of the dimensions in the inputs. `channels_last` corresponds -#' to inputs with shape `(batch_size, height, width, channels)` while -#' `channels_first` corresponds to inputs with shape `(batch_size, channels, -#' height, width)`. It defaults to the `image_data_format` value found in -#' your Keras config file at `~/.keras/keras.json`. If you never set it, then -#' it will be 'channels_last'. +#' @param dilation_rate +#' int or list of 1 integers, specifying the dilation +#' rate to use for dilated transposed convolution. #' -#' @param dilation_rate A single integer, specifying the dilation rate to use for -#' dilated convolution. Currently, specifying any `dilation_rate` value != 1 -#' is incompatible with specifying any stride value != 1. +#' @param activation +#' Activation function. If `NULL`, no activation is applied. #' -#' @param activation Activation function to use. If you don't specify anything, no -#' activation is applied (see `?activation_relu`). +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. #' -#' @param use_bias Boolean, whether the layer uses a bias vector. +#' @param kernel_initializer +#' Initializer for the convolution kernel. If `NULL`, +#' the default initializer (`"glorot_uniform"`) will be used. #' -#' @param depthwise_initializer Initializer for the depthwise kernel matrix (see -#' [`initializer_glorot_uniform`]). If NULL, the default initializer -#' (`"glorot_uniform"`) will be used. +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. #' -#' @param bias_initializer Initializer for the bias vector (see -#' `keras.initializers`). If NULL, the default initializer ('zeros') will be -#' used. +#' @param kernel_regularizer +#' Optional regularizer for the convolution kernel. #' -#' @param depthwise_regularizer Regularizer function applied to the depthwise kernel -#' matrix (see [`regularizer_l1()`]). +#' @param bias_regularizer +#' Optional regularizer for the bias vector. #' -#' @param bias_regularizer Regularizer function applied to the bias vector (see -#' [`regularizer_l1()`]). +#' @param activity_regularizer +#' Optional regularizer function for the output. #' -#' @param activity_regularizer Regularizer function applied to the output of the -#' layer (its 'activation') (see [`regularizer_l1()`]). +#' @param kernel_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. #' -#' @param depthwise_constraint Constraint function applied to the depthwise kernel -#' matrix (see [`constraint_maxnorm()`]). +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. #' -#' @param bias_constraint Constraint function applied to the bias vector (see -#' [`constraint_maxnorm()`]). +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @param ... standard layer arguments. +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams layer_depthwise_conv_2d +#' @export #' @family convolutional layers -#' +#' @family layers #' @seealso -#' + -#' @export -layer_depthwise_conv_1d <- -function(object, - kernel_size, - strides = 1L, - padding = "valid", - depth_multiplier = 1L, - data_format = NULL, - dilation_rate = 1L, - activation = NULL, - use_bias = TRUE, - depthwise_initializer = "glorot_uniform", - bias_initializer = "zeros", - depthwise_regularizer = NULL, - bias_regularizer = NULL, - activity_regularizer = NULL, - depthwise_constraint = NULL, - bias_constraint = NULL, - ...) { - args <- capture_args(match.call(), - list(kernel_size = as.integer, - strides = as.integer, - depth_multiplier = as.integer, - dilation_rate = as.integer), - ignore = "object" - ) - create_layer(keras$layers$DepthwiseConv1D, object, args) +#' + +# + +#' @tether keras.layers.Conv1DTranspose +layer_conv_1d_transpose <- +function (object, filters, kernel_size, strides = 1L, padding = "valid", + data_format = NULL, dilation_rate = 1L, activation = NULL, + use_bias = TRUE, kernel_initializer = "glorot_uniform", bias_initializer = "zeros", + kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + kernel_constraint = NULL, bias_constraint = NULL, ...) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Conv1DTranspose, object, args) } -#' Depthwise separable 2D convolution. + +#' 2D convolution layer. #' -#' Depthwise Separable convolutions consists in performing just the first step -#' in a depthwise spatial convolution (which acts on each input channel -#' separately). The `depth_multiplier` argument controls how many output -#' channels are generated per input channel in the depthwise step. +#' @description +#' This layer creates a convolution kernel that is convolved with the layer +#' input over a single spatial (or temporal) dimension to produce a tensor of +#' outputs. If `use_bias` is `TRUE`, a bias vector is created and added to the +#' outputs. Finally, if `activation` is not `NULL`, it is applied to the +#' outputs as well. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: `(batch_size, height, width, channels)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 10, 128)) +#' y <- x |> layer_conv_2d(32, 3, activation='relu') +#' shape(y) +#' ``` #' -#' @inheritParams layer_separable_conv_2d -#' -#' @family convolutional layers +#' @returns +#' A 4D tensor representing `activation(conv2d(inputs, kernel) + bias)`. +#' +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the convolution). +#' +#' @param kernel_size +#' int or list of 2 integer, specifying the size of the +#' convolution window. +#' +#' @param strides +#' int or list of 2 integer, specifying the stride length +#' of the convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch_size, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch_size, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 2 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param groups +#' A positive int specifying the number of groups in which the +#' input is split along the channel axis. Each group is convolved +#' separately with `filters // groups` filters. The output is the +#' concatenation of all the `groups` results along the channel axis. +#' Input channels and `filters` must both be divisible by `groups`. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param kernel_initializer +#' Initializer for the convolution kernel. If `NULL`, +#' the default initializer (`"glorot_uniform"`) will be used. +#' +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. +#' +#' @param kernel_regularizer +#' Optional regularizer for the convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param kernel_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_depthwise_conv_2d <- function(object, kernel_size, strides = c(1, 1), padding = "valid", depth_multiplier = 1, - data_format = NULL, dilation_rate = c(1, 1), activation = NULL, use_bias = TRUE, - depthwise_initializer = "glorot_uniform", bias_initializer = "zeros", - depthwise_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - depthwise_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - depth_multiplier = as.integer(depth_multiplier), - data_format = data_format, - activation = activation, - use_bias = use_bias, - depthwise_initializer = depthwise_initializer, - bias_initializer = bias_initializer, - depthwise_regularizer = depthwise_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - depthwise_constraint = depthwise_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if(tf_version() >= "2.3") - args$dilation_rate <- as_integer_tuple(dilation_rate) - - create_layer(keras$layers$DepthwiseConv2D, object, args) +#' @family convolutional layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Conv2D +layer_conv_2d <- +function (object, filters, kernel_size, strides = list(1L, 1L), + padding = "valid", data_format = NULL, dilation_rate = list( + 1L, 1L), groups = 1L, activation = NULL, use_bias = TRUE, + kernel_initializer = "glorot_uniform", bias_initializer = "zeros", + kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + kernel_constraint = NULL, bias_constraint = NULL, ...) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + groups = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Conv2D, object, args) } -#' Depthwise separable 1D convolution. -#' -#' Separable convolutions consist in first performing a depthwise spatial -#' convolution (which acts on each input channel separately) followed by a -#' pointwise convolution which mixes together the resulting output channels. The -#' `depth_multiplier` argument controls how many output channels are generated -#' per input channel in the depthwise step. Intuitively, separable convolutions -#' can be understood as a way to factorize a convolution kernel into two smaller -#' kernels, or as an extreme version of an Inception block. -#' -#' @inheritParams layer_conv_2d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of 2 integers, specifying the width and -#' height of the 2D convolution window. Can be a single integer to specify the -#' same value for all spatial dimensions. -#' @param strides An integer or list of 2 integers, specifying the strides of -#' the convolution along the width and height. Can be a single integer to -#' specify the same value for all spatial dimensions. Specifying any stride -#' value != 1 is incompatible with specifying any `dilation_rate` value != 1. -#' @param padding one of `"valid"` or `"same"` (case-insensitive). -#' @param depth_multiplier The number of depthwise convolution output channels -#' for each input channel. The total number of depthwise convolution output -#' channels will be equal to `filters_in * depth_multiplier`. -#' @param depthwise_initializer Initializer for the depthwise kernel matrix. -#' @param pointwise_initializer Initializer for the pointwise kernel matrix. -#' @param depthwise_regularizer Regularizer function applied to the depthwise -#' kernel matrix. -#' @param pointwise_regularizer Regularizer function applied to the pointwise -#' kernel matrix. -#' @param depthwise_constraint Constraint function applied to the depthwise -#' kernel matrix. -#' @param pointwise_constraint Constraint function applied to the pointwise -#' kernel matrix. -#' -#' @section Input shape: 3D tensor with shape: `(batch, channels, steps)` -#' if data_format='channels_first' or 3D tensor with shape: `(batch, steps, channels)` -#' if data_format='channels_last'. -#' -#' @section Output shape: 3D tensor with shape: `(batch, filters, new_steps)` -#' if data_format='channels_first' or 3D tensor with shape: -#' `(batch, new_steps, filters)` if data_format='channels_last'. -#' `new_steps` values might have changed due to padding or strides. +#' 2D transposed convolution layer. #' -#' @family convolutional layers +#' @description +#' The need for transposed convolutions generally arise from the desire to use +#' a transformation going in the opposite direction of a normal convolution, +#' i.e., from something that has the shape of the output of some convolution +#' to something that has the shape of its input while maintaining a +#' connectivity pattern that is compatible with said convolution. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: `(batch_size, height, width, channels)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # References +#' - [A guide to convolution arithmetic for deep learning]( +#' https://arxiv.org/abs/1603.07285) +#' - [Deconvolutional Networks]( +#' https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 8, 128)) +#' y <- x |> layer_conv_2d_transpose(32, 2, 2, activation='relu') +#' shape(y) +#' # (4, 20, 16, 32) +#' ``` +#' +#' @returns +#' A 4D tensor representing +#' `activation(conv2d_transpose(inputs, kernel) + bias)`. +#' +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the transposed convolution). +#' +#' @param kernel_size +#' int or list of 1 integer, specifying the size of the +#' transposed convolution window. +#' +#' @param strides +#' int or list of 1 integer, specifying the stride length +#' of the transposed convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch_size, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch_size, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 1 integers, specifying the dilation +#' rate to use for dilated transposed convolution. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param kernel_initializer +#' Initializer for the convolution kernel. If `NULL`, +#' the default initializer (`"glorot_uniform"`) will be used. +#' +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. +#' +#' @param kernel_regularizer +#' Optional regularizer for the convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param kernel_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_separable_conv_1d <- function(object, filters, kernel_size, strides = 1, padding = "valid", data_format = "channels_last", - dilation_rate = 1, depth_multiplier = 1, activation = NULL, use_bias = TRUE, - depthwise_initializer = "glorot_uniform", pointwise_initializer = "glorot_uniform", bias_initializer = "zeros", - depthwise_regularizer = NULL, pointwise_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - depthwise_constraint = NULL, pointwise_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - depth_multiplier = as.integer(depth_multiplier), - activation = activation, - use_bias = use_bias, - depthwise_initializer = depthwise_initializer, - pointwise_initializer = pointwise_initializer, - bias_initializer = bias_initializer, - depthwise_regularizer = depthwise_regularizer, - pointwise_regularizer = pointwise_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - depthwise_constraint = depthwise_constraint, - pointwise_constraint = pointwise_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.1.6") - args$dilation_rate <- as.integer(dilation_rate) - - create_layer(keras$layers$SeparableConv1D, object, args) - +#' @family convolutional layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Conv2DTranspose +layer_conv_2d_transpose <- +function (object, filters, kernel_size, strides = list(1L, 1L), + padding = "valid", data_format = NULL, dilation_rate = list( + 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", + bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, + activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, + ...) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Conv2DTranspose, object, args) } -#' Upsampling layer for 1D inputs. -#' -#' Repeats each temporal step `size` times along the time axis. -#' -#' @inheritParams layer_dense -#' -#' @param size integer. Upsampling factor. -#' -#' @section Input shape: 3D tensor with shape: `(batch, steps, features)`. +#' 3D convolution layer. #' -#' @section Output shape: 3D tensor with shape: `(batch, upsampled_steps, -#' features)`. +#' @description +#' This layer creates a convolution kernel that is convolved with the layer +#' input over a single spatial (or temporal) dimension to produce a tensor of +#' outputs. If `use_bias` is `TRUE`, a bias vector is created and added to the +#' outputs. Finally, if `activation` is not `NULL`, it is applied to the +#' outputs as well. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3, +#' filters)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, filters, new_spatial_dim1, new_spatial_dim2, +#' new_spatial_dim3)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 10, 10, 128)) +#' y <- x |> layer_conv_3d(32, 3, activation = 'relu') +#' shape(y) +#' ``` #' -#' @family convolutional layers +#' @returns +#' A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`. +#' +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the convolution). +#' +#' @param kernel_size +#' int or list of 3 integer, specifying the size of the +#' convolution window. +#' +#' @param strides +#' int or list of 3 integer, specifying the stride length +#' of the convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' It defaults to the `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json`. If you never set it, then it +#' will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 3 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param groups +#' A positive int specifying the number of groups in which the +#' input is split along the channel axis. Each group is convolved +#' separately with `filters %/% groups` filters. The output is the +#' concatenation of all the `groups` results along the channel axis. +#' Input channels and `filters` must both be divisible by `groups`. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param kernel_initializer +#' Initializer for the convolution kernel. If `NULL`, +#' the default initializer (`"glorot_uniform"`) will be used. +#' +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. +#' +#' @param kernel_regularizer +#' Optional regularizer for the convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param kernel_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_upsampling_1d <- function(object, size = 2L, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$UpSampling1D, object, list( - size = as.integer(size), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - +#' @family convolutional layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Conv3D +layer_conv_3d <- +function (object, filters, kernel_size, strides = list(1L, 1L, + 1L), padding = "valid", data_format = NULL, dilation_rate = list( + 1L, 1L, 1L), groups = 1L, activation = NULL, use_bias = TRUE, + kernel_initializer = "glorot_uniform", bias_initializer = "zeros", + kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + kernel_constraint = NULL, bias_constraint = NULL, ...) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + groups = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Conv3D, object, args) } -#' Upsampling layer for 2D inputs. +#' 3D transposed convolution layer. #' -#' Repeats the rows and columns of the data by `size[[0]]` and `size[[1]]` respectively. +#' @description +#' The need for transposed convolutions generally arise from the desire to use +#' a transformation going in the opposite direction of a normal convolution, +#' i.e., from something that has the shape of the output of some convolution +#' to something that has the shape of its input while maintaining a +#' connectivity pattern that is compatible with said convolution. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, new_spatial_dim1, new_spatial_dim2, new_spatial_dim3, +#' filters)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, filters, new_spatial_dim1, new_spatial_dim2, +#' new_spatial_dim3)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # References +#' - [A guide to convolution arithmetic for deep learning]( +#' https://arxiv.org/abs/1603.07285v1) +#' - [Deconvolutional Networks]( +#' https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 8, 12, 128)) +#' y <- x |> layer_conv_3d_transpose(32, 2, 2, activation = 'relu') +#' shape(y) +#' ``` #' -#' @inheritParams layer_conv_2d +#' @returns +#' A 5D tensor representing `activation(conv3d(inputs, kernel) + bias)`. #' -#' @param size int, or list of 2 integers. The upsampling factors for rows and -#' columns. -#' @param interpolation A string, one of `nearest` or `bilinear`. -#' Note that CNTK does not support yet the `bilinear` upscaling -#' and that with Theano, only `size=(2, 2)` is possible. +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the transposed convolution). #' +#' @param kernel_size +#' int or list of 1 integer, specifying the size of the +#' transposed convolution window. #' -#' @section Input shape: -#' 4D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, rows, cols, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, rows, cols)` +#' @param strides +#' int or list of 1 integer, specifying the stride length +#' of the transposed convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. #' -#' @section Output shape: -#' 4D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, upsampled_rows, upsampled_cols, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, upsampled_rows, upsampled_cols)` +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. #' -#' @family convolutional layers +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' It defaults to the `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json`. If you never set it, then it +#' will be `"channels_last"`. #' -#' @export -layer_upsampling_2d <- function(object, size = c(2L, 2L), data_format = NULL, interpolation = "nearest", - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - size = as.integer(size), - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.2.3") - args$interpolation <- interpolation - - create_layer(keras$layers$UpSampling2D, object, args) - -} - - -#' Upsampling layer for 3D inputs. -#' -#' Repeats the 1st, 2nd and 3rd dimensions of the data by `size[[0]]`, `size[[1]]` and -#' `size[[2]]` respectively. -#' -#' @inheritParams layer_upsampling_1d -#' -#' @param size int, or list of 3 integers. The upsampling factors for dim1, dim2 -#' and dim3. -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' -#' @section Input shape: -#' 5D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, dim1, dim2, dim3, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, dim1, dim2, dim3)` -#' -#' @section Output shape: -#' 5D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, upsampled_dim1, upsampled_dim2, upsampled_dim3, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, upsampled_dim1, upsampled_dim2, upsampled_dim3)` +#' @param dilation_rate +#' int or list of 1 integers, specifying the dilation +#' rate to use for dilated transposed convolution. #' -#' @family convolutional layers +#' @param activation +#' Activation function. If `NULL`, no activation is applied. #' -#' @export -layer_upsampling_3d <- function(object, size= c(2L, 2L, 2L), data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$UpSampling3D, object, list( - size = as.integer(size), - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Zero-padding layer for 1D input (e.g. temporal sequence). +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. #' -#' @inheritParams layer_conv_2d +#' @param kernel_initializer +#' Initializer for the convolution kernel. If `NULL`, +#' the default initializer (`"glorot_uniform"`) will be used. #' -#' @param padding int, or list of int (length 2) -#' - If int: How many zeros to add at the beginning and end of the padding dimension (axis 1). -#' - If list of int (length 2): How many zeros to add at the beginning and at the end of the padding dimension (`(left_pad, right_pad)`). -#' -#' @section Input shape: -#' 3D tensor with shape `(batch, axis_to_pad, features)` -#' -#' @section Output shape: -#' 3D tensor with shape `(batch, padded_axis, features)` -#' -#' @family convolutional layers +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. #' -#' @export -layer_zero_padding_1d <- function(object, padding = 1L, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$ZeroPadding1D, object, list( - padding = as.integer(padding), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) -} - - -#' Zero-padding layer for 2D input (e.g. picture). +#' @param kernel_regularizer +#' Optional regularizer for the convolution kernel. #' -#' This layer can add rows and columns of zeros at the top, bottom, left and -#' right side of an image tensor. +#' @param bias_regularizer +#' Optional regularizer for the bias vector. #' -#' @inheritParams layer_conv_2d -#' @inheritParams layer_zero_padding_1d +#' @param activity_regularizer +#' Optional regularizer function for the output. #' -#' @param padding int, or list of 2 ints, or list of 2 lists of 2 ints. -#' - If int: the same symmetric padding is applied to width and height. -#' - If list of 2 ints: interpreted as two different symmetric padding values for height -#' and width: `(symmetric_height_pad, symmetric_width_pad)`. -#' - If list of 2 lists of 2 ints: interpreted as `((top_pad, bottom_pad), (left_pad, -#' right_pad))` +#' @param kernel_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. #' -#' @section Input shape: 4D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, rows, cols, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, rows, cols)` +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. #' -#' @section Output shape: 4D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, padded_rows, padded_cols, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, padded_rows, padded_cols)` +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @family convolutional layers +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_zero_padding_2d <- function(object, padding = c(1L, 1L), data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$ZeroPadding2D, object, list( - padding = normalize_padding(padding, 2L), - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Zero-padding layer for 3D data (spatial or spatio-temporal). -#' -#' @inheritParams layer_zero_padding_1d -#' -#' @param padding int, or list of 3 ints, or list of 3 lists of 2 ints. -#' - If int: the same symmetric padding is applied to width and height. -#' - If list of 3 ints: interpreted as three different symmetric padding values: -#' `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`. -#' - If list of 3 lists of 2 ints: interpreted as `((left_dim1_pad, -#' right_dim1_pad), (left_dim2_pad, right_dim2_pad), (left_dim3_pad, -#' right_dim3_pad))` -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' -#' @section Input shape: 5D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, first_axis_to_pad, second_axis_to_pad, -#' third_axis_to_pad, depth)` -#' - If `data_format` is `"channels_first"`: `(batch, depth, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad)` -#' -#' @section Output shape: 5D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, first_padded_axis, second_padded_axis, -#' third_axis_to_pad, depth)` -#' - If `data_format` is `"channels_first"`: `(batch, depth, first_padded_axis, second_padded_axis, third_axis_to_pad)` -#' #' @family convolutional layers -#' -#' @export -layer_zero_padding_3d <- function(object, padding = c(1L, 1L, 1L), data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$ZeroPadding3D, object, list( - padding = normalize_padding(padding, 3L), - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Conv3DTranspose +layer_conv_3d_transpose <- +function (object, filters, kernel_size, strides = list(1L, 1L, + 1L), padding = "valid", data_format = NULL, dilation_rate = list( + 1L, 1L, 1L), activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", + bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, + activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, + ...) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Conv3DTranspose, object, args) } -#' Cropping layer for 1D input (e.g. temporal sequence). +#' 1D depthwise convolution layer. #' -#' It crops along the time dimension (axis 1). +#' @description +#' Depthwise convolution is a type of convolution in which each input channel +#' is convolved with a different kernel (called a depthwise kernel). You can +#' understand depthwise convolution as the first step in a depthwise separable +#' convolution. #' -#' @inheritParams layer_dense +#' It is implemented via the following steps: #' -#' @param cropping int or list of int (length 2) How many units should be -#' trimmed off at the beginning and end of the cropping dimension (axis 1). If -#' a single int is provided, the same value will be used for both. +#' - Split the input into individual channels. +#' - Convolve each channel with an individual depthwise kernel with +#' `depth_multiplier` output channels. +#' - Concatenate the convolved outputs along the channels axis. #' -#' @section Input shape: 3D tensor with shape `(batch, axis_to_crop, features)` +#' Unlike a regular 1D convolution, depthwise convolution does not mix +#' information across different input channels. #' -#' @section Output shape: 3D tensor with shape `(batch, cropped_axis, features)` +#' The `depth_multiplier` argument determines how many filters are applied to +#' one input channel. As such, it controls the amount of output channels that +#' are generated per input channel in the depthwise step. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: `(batch_shape, steps, channels)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: `(batch_shape, channels, steps)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: +#' `(batch_shape, new_steps, channels * depth_multiplier)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: +#' `(batch_shape, channels * depth_multiplier, new_steps)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 12)) +#' y <- x |> layer_depthwise_conv_1d( +#' kernel_size = 3, +#' depth_multiplier = 3, +#' activation = 'relu' +#' ) +#' shape(y) +#' ``` #' -#' @family convolutional layers +#' @returns +#' A 3D tensor representing +#' `activation(depthwise_conv1d(inputs, kernel) + bias)`. +#' +#' @param kernel_size +#' int or list of 1 integer, specifying the size of the +#' depthwise convolution window. +#' +#' @param strides +#' int or list of 1 integer, specifying the stride length +#' of the convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' +#' @param depth_multiplier +#' The number of depthwise convolution output channels +#' for each input channel. The total number of depthwise convolution +#' output channels will be equal to `input_channel * depth_multiplier`. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 1 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param depthwise_initializer +#' Initializer for the convolution kernel. +#' If `NULL`, the default initializer (`"glorot_uniform"`) +#' will be used. +#' +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. +#' +#' @param depthwise_regularizer +#' Optional regularizer for the convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param depthwise_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_cropping_1d <- function(object, cropping = c(1L, 1L), - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$Cropping1D, object, list( - cropping = as.integer(cropping), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) +#' @family convolutional layers +#' @family layers +# @seealso +# + +#' @tether keras.layers.DepthwiseConv1D +layer_depthwise_conv_1d <- +function (object, kernel_size, strides = 1L, padding = "valid", + depth_multiplier = 1L, data_format = NULL, dilation_rate = 1L, + activation = NULL, use_bias = TRUE, depthwise_initializer = "glorot_uniform", + bias_initializer = "zeros", depthwise_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, depthwise_constraint = NULL, + bias_constraint = NULL, ...) +{ + args <- capture_args(list(kernel_size = as_integer, strides = as_integer, + depth_multiplier = as_integer, dilation_rate = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$DepthwiseConv1D, object, args) } -#' Cropping layer for 2D input (e.g. picture). +#' 2D depthwise convolution layer. #' -#' It crops along spatial dimensions, i.e. width and height. +#' @description +#' Depthwise convolution is a type of convolution in which each input channel +#' is convolved with a different kernel (called a depthwise kernel). You can +#' understand depthwise convolution as the first step in a depthwise separable +#' convolution. #' -#' @inheritParams layer_conv_2d -#' @inheritParams layer_cropping_1d +#' It is implemented via the following steps: #' -#' @param cropping int, or list of 2 ints, or list of 2 lists of 2 ints. -#' - If int: the same symmetric cropping is applied to width and height. -#' - If list of 2 ints: interpreted as two different symmetric cropping values for -#' height and width: `(symmetric_height_crop, symmetric_width_crop)`. -#' - If list of 2 lists of 2 ints: interpreted as `((top_crop, bottom_crop), (left_crop, -#' right_crop))` +#' - Split the input into individual channels. +#' - Convolve each channel with an individual depthwise kernel with +#' `depth_multiplier` output channels. +#' - Concatenate the convolved outputs along the channels axis. #' -#' @section Input shape: 4D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, rows, cols, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, rows, cols)` +#' Unlike a regular 2D convolution, depthwise convolution does not mix +#' information across different input channels. #' -#' @section Output shape: 4D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, cropped_rows, cropped_cols, channels)` -#' - If `data_format` is `"channels_first"`: `(batch, channels, cropped_rows, cropped_cols)` +#' The `depth_multiplier` argument determines how many filters are applied to +#' one input channel. As such, it controls the amount of output channels that +#' are generated per input channel in the depthwise step. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: `(batch_size, height, width, channels)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: +#' `(batch_size, new_height, new_width, channels * depth_multiplier)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: +#' `(batch_size, channels * depth_multiplier, new_height, new_width)` +#' +#' # Raises +#' ValueError: when both `strides > 1` and `dilation_rate > 1`. +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 10, 12)) +#' y <- x |> layer_depthwise_conv_2d(3, 3, activation = 'relu') +#' shape(y) +#' ``` #' -#' @family convolutional layers +#' @returns +#' A 4D tensor representing +#' `activation(depthwise_conv2d(inputs, kernel) + bias)`. +#' +#' @param kernel_size +#' int or list of 2 integer, specifying the size of the +#' depthwise convolution window. +#' +#' @param strides +#' int or list of 2 integer, specifying the stride length +#' of the depthwise convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' +#' @param depth_multiplier +#' The number of depthwise convolution output channels +#' for each input channel. The total number of depthwise convolution +#' output channels will be equal to `input_channel * depth_multiplier`. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file +#' at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 2 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param depthwise_initializer +#' Initializer for the convolution kernel. +#' If `NULL`, the default initializer (`"glorot_uniform"`) +#' will be used. +#' +#' @param bias_initializer +#' Initializer for the bias vector. If `NULL`, the +#' default initializer (`"zeros"`) will be used. +#' +#' @param depthwise_regularizer +#' Optional regularizer for the convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param depthwise_constraint +#' Optional projection function to be applied to the +#' kernel after being updated by an `Optimizer` (e.g. used to implement +#' norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). Constraints +#' are not safe to use when doing asynchronous distributed training. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_cropping_2d <- function(object, cropping = list(c(0L, 0L), c(0L, 0L)), data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$Cropping2D, object, list( - cropping = normalize_cropping(cropping, 2L), - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} - - -#' Cropping layer for 3D data (e.g. spatial or spatio-temporal). -#' -#' @inheritParams layer_cropping_1d -#' -#' @param cropping int, or list of 3 ints, or list of 3 lists of 2 ints. -#' - If int: the same symmetric cropping -#' is applied to depth, height, and width. -#' - If list of 3 ints: -#' interpreted as two different -#' symmetric cropping values for depth, height, and width: -#' `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. -#' - If list of 3 list of 2 ints: -#' interpreted as -#' `((left_dim1_crop, right_dim1_crop), (left_dim2_crop, right_dim2_crop), (left_dim3_crop, right_dim3_crop))` -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' -#' @section Input shape: 5D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, first_axis_to_crop, second_axis_to_crop, -#' third_axis_to_crop, depth)` -#' - If `data_format` is `"channels_first"`: -#' `(batch, depth, first_axis_to_crop, second_axis_to_crop, -#' third_axis_to_crop)` -#' -#' @section Output shape: 5D tensor with shape: -#' - If `data_format` is `"channels_last"`: `(batch, first_cropped_axis, second_cropped_axis, -#' third_cropped_axis, depth)` -#' - If `data_format` is `"channels_first"`: `(batch, depth, first_cropped_axis, second_cropped_axis, -#' third_cropped_axis)` -#' #' @family convolutional layers -#' -#' @export -layer_cropping_3d <- function(object, cropping = list(c(1L, 1L), c(1L, 1L), c(1L, 1L)), data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$Cropping3D, object, list( - cropping = normalize_cropping(cropping, 3L), - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.DepthwiseConv2D +layer_depthwise_conv_2d <- +function (object, kernel_size, strides = list(1L, 1L), padding = "valid", + depth_multiplier = 1L, data_format = NULL, dilation_rate = list( + 1L, 1L), activation = NULL, use_bias = TRUE, depthwise_initializer = "glorot_uniform", + bias_initializer = "zeros", depthwise_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, depthwise_constraint = NULL, + bias_constraint = NULL, ...) +{ + args <- capture_args(list(kernel_size = as_integer, strides = as_integer, + depth_multiplier = as_integer, dilation_rate = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$DepthwiseConv2D, object, args) } -#' Convolutional LSTM. -#' -#' It is similar to an LSTM layer, but the input transformations and recurrent -#' transformations are both convolutional. -#' -#' @inheritParams layer_conv_2d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number of output filters in the convolution). -#' @param kernel_size An integer or list of n integers, specifying the -#' dimensions of the convolution window. -#' @param strides An integer or list of n integers, specifying the strides of -#' the convolution. Specifying any stride value != 1 is incompatible with -#' specifying any `dilation_rate` value != 1. -#' @param padding One of `"valid"` or `"same"` (case-insensitive). -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, time, ..., -#' channels)` while `channels_first` corresponds to inputs with shape `(batch, -#' time, channels, ...)`. It defaults to the `image_data_format` value found -#' in your Keras config file at `~/.keras/keras.json`. If you never set it, -#' then it will be "channels_last". -#' @param dilation_rate An integer or list of n integers, specifying the -#' dilation rate to use for dilated convolution. Currently, specifying any -#' `dilation_rate` value != 1 is incompatible with specifying any `strides` -#' value != 1. -#' @param activation Activation function to use. If you don't specify anything, -#' no activation is applied (ie. "linear" activation: `a(x) = x`). -#' @param recurrent_activation Activation function to use for the recurrent -#' step. -#' @param use_bias Boolean, whether the layer uses a bias vector. -#' @param kernel_initializer Initializer for the `kernel` weights matrix, used -#' for the linear transformation of the inputs.. -#' @param recurrent_initializer Initializer for the `recurrent_kernel` weights -#' matrix, used for the linear transformation of the recurrent state.. -#' @param bias_initializer Initializer for the bias vector. -#' @param unit_forget_bias Boolean. If TRUE, add 1 to the bias of the forget -#' gate at initialization. Use in combination with `bias_initializer="zeros"`. -#' This is recommended in [Jozefowicz et -#' al.](https://proceedings.mlr.press/v37/jozefowicz15.pdf) -#' @param kernel_regularizer Regularizer function applied to the `kernel` -#' weights matrix. -#' @param recurrent_regularizer Regularizer function applied to the -#' `recurrent_kernel` weights matrix. -#' @param bias_regularizer Regularizer function applied to the bias vector. -#' @param activity_regularizer Regularizer function applied to the output of the -#' layer (its "activation").. -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. -#' @param recurrent_constraint Constraint function applied to the -#' `recurrent_kernel` weights matrix. -#' @param bias_constraint Constraint function applied to the bias vector. -#' @param return_sequences Boolean. Whether to return the last output in the -#' output sequence, or the full sequence. -#' @param return_state Boolean. Whether to return the last state in addition to the output. -#' @param go_backwards Boolean (default FALSE). If TRUE, rocess the input -#' sequence backwards. -#' @param stateful Boolean (default FALSE). If TRUE, the last state for each -#' sample at index i in a batch will be used as initial state for the sample -#' of index i in the following batch. -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the -#' linear transformation of the inputs. -#' @param recurrent_dropout Float between 0 and 1. Fraction of the units to drop -#' for the linear transformation of the recurrent state. -#' -#' @section Input shape: -#' - if data_format='channels_first' 5D tensor with shape: -#' `(samples,time, channels, rows, cols)` -#' - if data_format='channels_last' 5D -#' tensor with shape: `(samples,time, rows, cols, channels)` -#' -#' @section References: -#' - [Convolutional LSTM Network: A Machine Learning Approach for Precipitation Nowcasting](https://arxiv.org/abs/1506.04214v1) -#' The current implementation does not include the feedback loop on the cells -#' output +#' 1D separable convolution layer. +#' +#' @description +#' This layer performs a depthwise convolution that acts separately on +#' channels, followed by a pointwise convolution that mixes channels. +#' If `use_bias` is TRUE and a bias initializer is provided, +#' it adds a bias vector to the output. It then optionally applies an +#' activation function to produce the final output. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: `(batch_shape, steps, channels)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: `(batch_shape, channels, steps)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 3D tensor with shape: `(batch_shape, new_steps, filters)` +#' - If `data_format="channels_first"`: +#' A 3D tensor with shape: `(batch_shape, filters, new_steps)` +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 12)) +#' y <- layer_separable_conv_1d(x, 3, 2, 2, activation='relu') +#' shape(y) +#' ``` #' -#' @family convolutional layers +#' @returns +#' A 3D tensor representing +#' `activation(separable_conv1d(inputs, kernel) + bias)`. +#' +#' @param filters +#' int, the dimensionality of the output space (i.e. the number +#' of filters in the pointwise convolution). +#' +#' @param kernel_size +#' int or list of 1 integers, specifying the size of the +#' depthwise convolution window. +#' +#' @param strides +#' int or list of 1 integers, specifying the stride length +#' of the depthwise convolution. If only one int is specified, the same +#' stride size will be used for all dimensions. `strides > 1` is +#' incompatible with `dilation_rate > 1`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 1 integers, specifying the dilation +#' rate to use for dilated convolution. If only one int is specified, +#' the same dilation rate will be used for all dimensions. +#' +#' @param depth_multiplier +#' The number of depthwise convolution output channels +#' for each input channel. The total number of depthwise convolution +#' output channels will be equal to `input_channel * depth_multiplier`. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param depthwise_initializer +#' An initializer for the depthwise convolution +#' kernel. If NULL, then the default initializer (`"glorot_uniform"`) +#' will be used. +#' +#' @param pointwise_initializer +#' An initializer for the pointwise convolution +#' kernel. If NULL, then the default initializer (`"glorot_uniform"`) +#' will be used. +#' +#' @param bias_initializer +#' An initializer for the bias vector. If NULL, the +#' default initializer ('"zeros"') will be used. +#' +#' @param depthwise_regularizer +#' Optional regularizer for the depthwise +#' convolution kernel. +#' +#' @param pointwise_regularizer +#' Optional regularizer for the pointwise +#' convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param depthwise_constraint +#' Optional projection function to be applied to the +#' depthwise kernel after being updated by an `Optimizer` (e.g. used +#' for norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). +#' +#' @param pointwise_constraint +#' Optional projection function to be applied to the +#' pointwise kernel after being updated by an `Optimizer`. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_conv_lstm_2d <- function(object, filters, kernel_size, strides = c(1L, 1L), padding = "valid", data_format = NULL, - dilation_rate = c(1L, 1L), activation = "tanh", recurrent_activation = "hard_sigmoid", use_bias = TRUE, - kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", bias_initializer = "zeros", - unit_forget_bias = TRUE, kernel_regularizer = NULL, recurrent_regularizer = NULL, bias_regularizer = NULL, - activity_regularizer = NULL, kernel_constraint = NULL, recurrent_constraint = NULL, bias_constraint = NULL, - return_sequences = FALSE, return_state = FALSE, go_backwards = FALSE, stateful = FALSE, dropout = 0.0, recurrent_dropout = 0.0, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL, input_shape = NULL) { - args <- list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - dilation_rate = as.integer(dilation_rate), - activation = activation, - recurrent_activation = recurrent_activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - recurrent_initializer = recurrent_initializer, - bias_initializer = bias_initializer, - unit_forget_bias = unit_forget_bias, - kernel_regularizer = kernel_regularizer, - recurrent_regularizer = recurrent_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - recurrent_constraint = recurrent_constraint, - bias_constraint = bias_constraint, - return_sequences = return_sequences, - go_backwards = go_backwards, - stateful = stateful, - dropout = dropout, - recurrent_dropout = recurrent_dropout, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights, - input_shape = normalize_shape(input_shape) - ) - - if(tf_version() >= "2.3") - args$return_state <- return_state - - create_layer(keras$layers$ConvLSTM2D, object, args) - -} - - -#' 1D Convolutional LSTM -#' -#' @details -#' Similar to an LSTM layer, but the input transformations -#' and recurrent transformations are both convolutional. -#' -#' @inheritParams layer_dense -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the number of -#' output filters in the convolution). -#' -#' @param kernel_size An integer or list of n integers, specifying the -#' dimensions of the convolution window. -#' -#' @param strides An integer or list of n integers, specifying the strides of -#' the convolution. Specifying any stride value != 1 is incompatible with -#' specifying any `dilation_rate` value != 1. -#' -#' @param padding One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no -#' padding. `"same"` results in padding evenly to the left/right or up/down -#' of the input such that output has the same height/width dimension as the -#' input. -#' -#' @param data_format A string, one of `channels_last` (default) or `channels_first`. -#' The ordering of the dimensions in the inputs. `channels_last` corresponds -#' to inputs with shape `(batch, time, ..., channels)` while `channels_first` -#' corresponds to inputs with shape `(batch, time, channels, ...)`. It -#' defaults to the `image_data_format` value found in your Keras config file -#' at `~/.keras/keras.json`. If you never set it, then it will be -#' "channels_last". -#' -#' @param dilation_rate An integer or list of n integers, specifying the -#' dilation rate to use for dilated convolution. Currently, specifying any -#' `dilation_rate` value != 1 is incompatible with specifying any `strides` -#' value != 1. -#' -#' @param activation Activation function to use. By default hyperbolic tangent -#' activation function is applied (`tanh(x)`). -#' -#' @param recurrent_activation Activation function to use for the recurrent step. -#' -#' @param use_bias Boolean, whether the layer uses a bias vector. -#' -#' @param kernel_initializer Initializer for the `kernel` weights matrix, used for -#' the linear transformation of the inputs. -#' -#' @param recurrent_initializer Initializer for the `recurrent_kernel` weights -#' matrix, used for the linear transformation of the recurrent state. -#' -#' @param bias_initializer Initializer for the bias vector. -#' -#' @param unit_forget_bias Boolean. If TRUE, add 1 to the bias of the forget gate at -#' initialization. Use in combination with `bias_initializer="zeros"`. This -#' is recommended in [Jozefowicz et al., 2015]( -#' https://proceedings.mlr.press/v37/jozefowicz15.pdf) -#' -#' @param kernel_regularizer Regularizer function applied to the `kernel` weights -#' matrix. -#' -#' @param recurrent_regularizer Regularizer function applied to the -#' `recurrent_kernel` weights matrix. -#' -#' @param bias_regularizer Regularizer function applied to the bias vector. -#' -#' @param activity_regularizer Regularizer function applied to. -#' -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. -#' -#' @param recurrent_constraint Constraint function applied to the `recurrent_kernel` -#' weights matrix. -#' -#' @param bias_constraint Constraint function applied to the bias vector. -#' -#' @param return_sequences Boolean. Whether to return the last output in the output -#' sequence, or the full sequence. (default FALSE) -#' -#' @param return_state Boolean Whether to return the last state in addition to the -#' output. (default FALSE) -#' -#' @param go_backwards Boolean (default FALSE). If TRUE, process the input sequence -#' backwards. -#' -#' @param stateful Boolean (default FALSE). If TRUE, the last state for each sample -#' at index i in a batch will be used as initial state for the sample of -#' index i in the following batch. -#' -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the linear -#' transformation of the inputs. -#' -#' @param recurrent_dropout Float between 0 and 1. Fraction of the units to drop for -#' the linear transformation of the recurrent state. -#' @param ... standard layer arguments. -#' +#' @family convolutional layers +#' @family layers #' @seealso -#' + -#' -#' @export -layer_conv_lstm_1d <- -function(object, - filters, - kernel_size, - strides = 1L, - padding = "valid", - data_format = NULL, - dilation_rate = 1L, - activation = "tanh", - recurrent_activation = "hard_sigmoid", - use_bias = TRUE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - unit_forget_bias = TRUE, - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - activity_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - return_sequences = FALSE, - return_state = FALSE, - go_backwards = FALSE, - stateful = FALSE, - dropout = 0, - recurrent_dropout = 0, - ...) +#' + +# + +#' +#' @tether keras.layers.SeparableConv1D +layer_separable_conv_1d <- +function (object, filters, kernel_size, strides = 1L, padding = "valid", + data_format = NULL, dilation_rate = 1L, depth_multiplier = 1L, + activation = NULL, use_bias = TRUE, depthwise_initializer = "glorot_uniform", + pointwise_initializer = "glorot_uniform", bias_initializer = "zeros", + depthwise_regularizer = NULL, pointwise_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, depthwise_constraint = NULL, + pointwise_constraint = NULL, bias_constraint = NULL, ...) { - require_tf_version("2.6", "layer_conv_lstm_1d") - args <- capture_args( - match.call(), - list( - filters = as.integer, - kernel_size = as_integer_tuple, - strides = as_integer_tuple, - dilation_rate = as.integer, - batch_size = as_nullable_integer, - input_shape = normalize_shape - ), - ignore = "object" - ) - create_layer(keras$layers$ConvLSTM1D, object, args) + args <- capture_args(list(filters = as_integer, kernel_size = as_integer, + strides = as_integer, dilation_rate = as_integer, depth_multiplier = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$SeparableConv1D, object, args) } -#' 3D Convolutional LSTM -#' -#' @details -#' Similar to an LSTM layer, but the input transformations -#' and recurrent transformations are both convolutional. -#' -#' @inheritParams layer_dense -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the number of -#' output filters in the convolution). -#' -#' @param kernel_size An integer or list of n integers, specifying the -#' dimensions of the convolution window. -#' -#' @param strides An integer or list of n integers, specifying the strides of -#' the convolution. Specifying any stride value != 1 is incompatible with -#' specifying any `dilation_rate` value != 1. -#' -#' @param padding One of `"valid"` or `"same"` (case-insensitive). `"valid"` means no -#' padding. `"same"` results in padding evenly to the left/right or up/down -#' of the input such that output has the same height/width dimension as the -#' input. -#' -#' @param data_format A string, one of `channels_last` (default) or `channels_first`. -#' The ordering of the dimensions in the inputs. `channels_last` corresponds -#' to inputs with shape `(batch, time, ..., channels)` while `channels_first` -#' corresponds to inputs with shape `(batch, time, channels, ...)`. It -#' defaults to the `image_data_format` value found in your Keras config file -#' at `~/.keras/keras.json`. If you never set it, then it will be -#' "channels_last". -#' -#' @param dilation_rate An integer or list of n integers, specifying the -#' dilation rate to use for dilated convolution. Currently, specifying any -#' `dilation_rate` value != 1 is incompatible with specifying any `strides` -#' value != 1. -#' -#' @param activation Activation function to use. By default hyperbolic tangent -#' activation function is applied (`tanh(x)`). -#' -#' @param recurrent_activation Activation function to use for the recurrent step. -#' -#' @param use_bias Boolean, whether the layer uses a bias vector. -#' -#' @param kernel_initializer Initializer for the `kernel` weights matrix, used for -#' the linear transformation of the inputs. -#' -#' @param recurrent_initializer Initializer for the `recurrent_kernel` weights -#' matrix, used for the linear transformation of the recurrent state. -#' -#' @param bias_initializer Initializer for the bias vector. -#' -#' @param unit_forget_bias Boolean. If TRUE, add 1 to the bias of the forget gate at -#' initialization. Use in combination with `bias_initializer="zeros"`. This -#' is recommended in [Jozefowicz et al., 2015]( -#' https://proceedings.mlr.press/v37/jozefowicz15.pdf) -#' -#' @param kernel_regularizer Regularizer function applied to the `kernel` weights -#' matrix. -#' -#' @param recurrent_regularizer Regularizer function applied to the -#' `recurrent_kernel` weights matrix. -#' -#' @param bias_regularizer Regularizer function applied to the bias vector. -#' -#' @param activity_regularizer Regularizer function applied to. -#' -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. -#' -#' @param recurrent_constraint Constraint function applied to the `recurrent_kernel` -#' weights matrix. -#' -#' @param bias_constraint Constraint function applied to the bias vector. -#' -#' @param return_sequences Boolean. Whether to return the last output in the output -#' sequence, or the full sequence. (default FALSE) -#' -#' @param return_state Boolean Whether to return the last state in addition to the -#' output. (default FALSE) -#' -#' @param go_backwards Boolean (default FALSE). If TRUE, process the input sequence -#' backwards. -#' -#' @param stateful Boolean (default FALSE). If TRUE, the last state for each sample -#' at index i in a batch will be used as initial state for the sample of -#' index i in the following batch. -#' -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the linear -#' transformation of the inputs. -#' -#' @param recurrent_dropout Float between 0 and 1. Fraction of the units to drop for -#' the linear transformation of the recurrent state. -#' @param ... standard layer arguments. +#' 2D separable convolution layer. +#' +#' @description +#' This layer performs a depthwise convolution that acts separately on +#' channels, followed by a pointwise convolution that mixes channels. +#' If `use_bias` is TRUE and a bias initializer is provided, +#' it adds a bias vector to the output. It then optionally applies an +#' activation function to produce the final output. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: `(batch_size, height, width, channels)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' A 4D tensor with shape: `(batch_size, new_height, new_width, filters)` +#' - If `data_format="channels_first"`: +#' A 4D tensor with shape: `(batch_size, filters, new_height, new_width)` +#' +#' # Example +#' ```{r} +#' x <- random_uniform(c(4, 10, 10, 12)) +#' y <- layer_separable_conv_2d(x, 3, c(4, 3), 2, activation='relu') +#' shape(y) +#' ``` #' -#' @seealso -#' + +#' @returns +#' A 4D tensor representing +#' `activation(separable_conv2d(inputs, kernel) + bias)`. +#' +#' @param filters +#' int, the dimensionality of the output space (i.e. the number +#' of filters in the pointwise convolution). +#' +#' @param kernel_size +#' int or list of 2 integers, specifying the size of the +#' depthwise convolution window. +#' +#' @param strides +#' int or list of 2 integers, specifying the stride length +#' of the depthwise convolution. If only one int is specified, the same +#' stride size will be used for all dimensions. `strides > 1` is +#' incompatible with `dilation_rate > 1`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input. When `padding="same"` and +#' `strides=1`, the output has the same size as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file +#' at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or list of 2 integers, specifying the dilation +#' rate to use for dilated convolution. If only one int is specified, +#' the same dilation rate will be used for all dimensions. +#' +#' @param depth_multiplier +#' The number of depthwise convolution output channels +#' for each input channel. The total number of depthwise convolution +#' output channels will be equal to `input_channel * depth_multiplier`. +#' +#' @param activation +#' Activation function. If `NULL`, no activation is applied. +#' +#' @param use_bias +#' bool, if `TRUE`, bias will be added to the output. +#' +#' @param depthwise_initializer +#' An initializer for the depthwise convolution +#' kernel. If NULL, then the default initializer (`"glorot_uniform"`) +#' will be used. +#' +#' @param pointwise_initializer +#' An initializer for the pointwise convolution +#' kernel. If NULL, then the default initializer (`"glorot_uniform"`) +#' will be used. +#' +#' @param bias_initializer +#' An initializer for the bias vector. If NULL, the +#' default initializer ('"zeros"') will be used. +#' +#' @param depthwise_regularizer +#' Optional regularizer for the depthwise +#' convolution kernel. +#' +#' @param pointwise_regularizer +#' Optional regularizer for the pointwise +#' convolution kernel. +#' +#' @param bias_regularizer +#' Optional regularizer for the bias vector. +#' +#' @param activity_regularizer +#' Optional regularizer function for the output. +#' +#' @param depthwise_constraint +#' Optional projection function to be applied to the +#' depthwise kernel after being updated by an `Optimizer` (e.g. used +#' for norm constraints or value constraints for layer weights). The +#' function must take as input the unprojected variable and must return +#' the projected variable (which must have the same shape). +#' +#' @param pointwise_constraint +#' Optional projection function to be applied to the +#' pointwise kernel after being updated by an `Optimizer`. +#' +#' @param bias_constraint +#' Optional projection function to be applied to the +#' bias after being updated by an `Optimizer`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -layer_conv_lstm_3d <- -function(object, - filters, - kernel_size, - strides = c(1L, 1L, 1L), - padding = "valid", - data_format = NULL, - dilation_rate = c(1L, 1L, 1L), - activation = "tanh", - recurrent_activation = "hard_sigmoid", - use_bias = TRUE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - unit_forget_bias = TRUE, - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - activity_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - return_sequences = FALSE, - return_state = FALSE, - go_backwards = FALSE, - stateful = FALSE, - dropout = 0, - recurrent_dropout = 0, - ...) +#' @family convolutional layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.SeparableConv2D +layer_separable_conv_2d <- +function (object, filters, kernel_size, strides = list(1L, 1L), + padding = "valid", data_format = NULL, dilation_rate = list( + 1L, 1L), depth_multiplier = 1L, activation = NULL, use_bias = TRUE, + depthwise_initializer = "glorot_uniform", pointwise_initializer = "glorot_uniform", + bias_initializer = "zeros", depthwise_regularizer = NULL, + pointwise_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + depthwise_constraint = NULL, pointwise_constraint = NULL, + bias_constraint = NULL, ...) { - require_tf_version("2.6", "layer_conv_lstm_3d") - args <- capture_args( - match.call(), - list( - filters = as.integer, - kernel_size = as_integer_tuple, - strides = as_integer_tuple, - dilation_rate = as.integer, - batch_size = as_nullable_integer, - input_shape = normalize_shape - ), - ignore = "object" - ) - create_layer(keras$layers$ConvLSTM3D, object, args) -} - - - - - -normalize_padding <- function(padding, dims) { - normalize_scale("padding", padding, dims) -} - -normalize_cropping <- function(cropping, dims) { - normalize_scale("cropping", cropping, dims) -} - -normalize_scale <- function(name, scale, dims) { - - # validate and marshall scale argument - throw_invalid_scale <- function() { - stop(name, " must be a list of ", dims, " integers or list of ", dims, " lists of 2 integers", - call. = FALSE) - } - - # if all of the individual items are numeric then cast to integer vector - if (all(sapply(scale, function(x) length(x) == 1 && is.numeric(x)))) { - as.integer(scale) - } else if (is.list(scale)) { - lapply(scale, function(x) { - if (length(x) != 2) - throw_invalid_scale() - as.integer(x) - }) - } else { - throw_invalid_scale() - } + args <- capture_args(list(filters = as_integer, kernel_size = as_integer, + strides = as_integer, dilation_rate = as_integer, depth_multiplier = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$SeparableConv2D, object, args) } diff --git a/R/layers-core.R b/R/layers-core.R index 9aee657cfb..5407a3a112 100644 --- a/R/layers-core.R +++ b/R/layers-core.R @@ -1,564 +1,579 @@ -#' Input layer -#' -#' Layer to be used as an entry point into a graph. -#' -#' @param shape Shape, not including the batch size. For instance, -#' `shape=c(32)` indicates that the expected input will be batches -#' of 32-dimensional vectors. -#' @param batch_shape Shape, including the batch size. For instance, -#' `shape = c(10,32)` indicates that the expected input will be batches -#' of 10 32-dimensional vectors. `batch_shape = list(NULL, 32)` indicates -#' batches of an arbitrary number of 32-dimensional vectors. -#' @param name An optional name string for the layer. Should be unique in a -#' model (do not reuse the same name twice). It will be autogenerated if it -#' isn't provided. -#' @param dtype The data type expected by the input, as a string (`float32`, -#' `float64`, `int32`...) -#' @param sparse Boolean, whether the placeholder created is meant to be sparse. -#' @param tensor Existing tensor to wrap into the `Input` layer. If set, the -#' layer will not create a placeholder tensor. -#' @param ragged A boolean specifying whether the placeholder to be created is -#' ragged. Only one of 'ragged' and 'sparse' can be `TRUE` In this case, values -#' of 'NULL' in the 'shape' argument represent ragged dimensions. -#' -#' @return A tensor -#' -#' @family core layers -#' + +#' Just your regular densely-connected NN layer. +#' +#' @description +#' `Dense` implements the operation: +#' `output = activation(dot(input, kernel) + bias)` +#' where `activation` is the element-wise activation function +#' passed as the `activation` argument, `kernel` is a weights matrix +#' created by the layer, and `bias` is a bias vector created by the layer +#' (only applicable if `use_bias` is `TRUE`). +#' +#' # Note +#' If the input to the layer has a rank greater than 2, `Dense` +#' computes the dot product between the `inputs` and the `kernel` along the +#' last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`). +#' For example, if input has dimensions `(batch_size, d0, d1)`, then we create +#' a `kernel` with shape `(d1, units)`, and the `kernel` operates along axis 2 +#' of the `input`, on every sub-tensor of shape `(1, 1, d1)` (there are +#' `batch_size * d0` such sub-tensors). The output in this case will have +#' shape `(batch_size, d0, units)`. +#' +#' # Input Shape +#' N-D tensor with shape: `(batch_size, ..., input_dim)`. +#' The most common situation would be +#' a 2D input with shape `(batch_size, input_dim)`. +#' +#' # Output Shape +#' N-D tensor with shape: `(batch_size, ..., units)`. +#' For instance, for a 2D input with shape `(batch_size, input_dim)`, +#' the output would have shape `(batch_size, units)`. +#' +#' # Methods +#' - ```r +#' enable_lora( +#' rank, +#' a_initializer = 'he_uniform', +#' b_initializer = 'zeros' +#' ) +#' ``` +#' +#' # Readonly properties: +#' +#' - `kernel` +#' +#' @param units +#' Positive integer, dimensionality of the output space. +#' +#' @param activation +#' Activation function to use. +#' If you don't specify anything, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param use_bias +#' Boolean, whether the layer uses a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix. +#' +#' @param bias_initializer +#' Initializer for the bias vector. +#' +#' @param kernel_regularizer +#' Regularizer function applied to +#' the `kernel` weights matrix. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' +#' @param activity_regularizer +#' Regularizer function applied to +#' the output of the layer (its "activation"). +#' +#' @param kernel_constraint +#' Constraint function applied to +#' the `kernel` weights matrix. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' +#' @param lora_rank +#' Optional integer. If set, the layer's forward pass +#' will implement LoRA (Low-Rank Adaptation) +#' with the provided rank. LoRA sets the layer's kernel +#' to non-trainable and replaces it with a delta over the +#' original kernel, obtained via multiplying two lower-rank +#' trainable matrices. This can be useful to reduce the +#' computation cost of fine-tuning large dense layers. +#' You can also enable LoRA on an existing +#' `Dense` layer by calling `layer$enable_lora(rank)`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @returns The return value depends on the value provided for the first argument. +#' If `object` is: +#' - a `keras_model_sequential()`, then the layer is added to the sequential model +#' (which is modified in place). To enable piping, the sequential model is also +#' returned, invisibly. +#' - a `keras_input()`, then the output tensor from calling `layer(input)` is returned. +#' - `NULL` or missing, then a `Layer` instance is returned. #' @export -layer_input <- -function(shape = NULL, batch_shape = NULL, name = NULL, - dtype = NULL, sparse = FALSE, tensor = NULL, - ragged = FALSE) { - args <- capture_args(match.call(), - list(shape = normalize_shape, - batch_shape = normalize_shape)) - do.call(keras$layers$Input, args) +#' @family core layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Dense +layer_dense <- +function (object, units, activation = NULL, use_bias = TRUE, + kernel_initializer = "glorot_uniform", bias_initializer = "zeros", + kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + kernel_constraint = NULL, bias_constraint = NULL, lora_rank = NULL, + ...) +{ + args <- capture_args(list(units = as_integer, lora_rank = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Dense, object, args) } -#' Add a densely-connected NN layer to an output -#' -#' Implements the operation: `output = activation(dot(input, kernel) + bias)` -#' where `activation` is the element-wise activation function passed as the -#' `activation` argument, `kernel` is a weights matrix created by the layer, and -#' `bias` is a bias vector created by the layer (only applicable if `use_bias` -#' is `TRUE`). Note: if the input to the layer has a rank greater than 2, then -#' it is flattened prior to the initial dot product with `kernel`. -#' -#' @inheritParams layer_input -#' -#' @param object What to compose the new `Layer` instance with. Typically a -#' Sequential model or a Tensor (e.g., as returned by `layer_input()`). -#' The return value depends on `object`. If `object` is: -#' -#' - missing or `NULL`, the `Layer` instance is returned. -#' - a `Sequential` model, the model with an additional layer is returned. -#' - a Tensor, the output tensor from `layer_instance(object)` is returned. -#' -#' @param units Positive integer, dimensionality of the output space. -#' @param activation Name of activation function to use. If you don't specify -#' anything, no activation is applied (ie. "linear" activation: a(x) = x). -#' @param use_bias Whether the layer uses a bias vector. -#' @param kernel_initializer Initializer for the `kernel` weights matrix. -#' @param bias_initializer Initializer for the bias vector. -#' @param kernel_regularizer Regularizer function applied to the `kernel` -#' weights matrix. -#' @param bias_regularizer Regularizer function applied to the bias vector. -#' @param activity_regularizer Regularizer function applied to the output of the -#' layer (its "activation").. -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. -#' @param bias_constraint Constraint function applied to the bias vector. -#' @param input_shape Dimensionality of the input (integer) not including the -#' samples axis. This argument is required when using this layer as the first -#' layer in a model. -#' @param batch_input_shape Shapes, including the batch size. For instance, -#' `batch_input_shape=c(10, 32)` indicates that the expected input will be -#' batches of 10 32-dimensional vectors. `batch_input_shape=list(NULL, 32)` -#' indicates batches of an arbitrary number of 32-dimensional vectors. -#' @param batch_size Fixed batch size for layer -#' @param trainable Whether the layer weights will be updated during training. -#' @param weights Initial weights for layer. -#' -#' @section Input and Output Shapes: -#' -#' Input shape: nD tensor with shape: `(batch_size, ..., input_dim)`. The most -#' common situation would be a 2D input with shape `(batch_size, input_dim)`. -#' -#' Output shape: nD tensor with shape: `(batch_size, ..., units)`. For -#' instance, for a 2D input with shape `(batch_size, input_dim)`, the output -#' would have shape `(batch_size, unit)`. -#' -#' @family core layers -#' +#' A layer that uses `einsum` as the backing computation. +#' +#' @description +#' This layer can perform einsum calculations of arbitrary dimensionality. +#' +#' # Examples +#' **Biased dense layer with einsums** +#' +#' This example shows how to instantiate a standard Keras dense layer using +#' einsum operations. This example is equivalent to +#' `layer_Dense(64, use_bias=TRUE)`. +#' +#' ```{r} +#' input <- layer_input(shape = c(32)) +#' output <- input |> +#' layer_einsum_dense("ab,bc->ac", +#' output_shape = 64, +#' bias_axes = "c") +#' output # shape(NA, 64) +#' ``` +#' +#' **Applying a dense layer to a sequence** +#' +#' This example shows how to instantiate a layer that applies the same dense +#' operation to every element in a sequence. Here, the `output_shape` has two +#' values (since there are two non-batch dimensions in the output); the first +#' dimension in the `output_shape` is `NA`, because the sequence dimension +#' `b` has an unknown shape. +#' +#' ```{r} +#' input <- layer_input(shape = c(32, 128)) +#' output <- input |> +#' layer_einsum_dense("abc,cd->abd", +#' output_shape = c(NA, 64), +#' bias_axes = "d") +#' output # shape(NA, 32, 64) +#' ``` +#' +#' **Applying a dense layer to a sequence using ellipses** +#' +#' This example shows how to instantiate a layer that applies the same dense +#' operation to every element in a sequence, but uses the ellipsis notation +#' instead of specifying the batch and sequence dimensions. +#' +#' Because we are using ellipsis notation and have specified only one axis, the +#' `output_shape` arg is a single value. When instantiated in this way, the +#' layer can handle any number of sequence dimensions - including the case +#' where no sequence dimension exists. +#' +#' ```{r} +#' input <- layer_input(shape = c(32, 128)) +#' output <- input |> +#' layer_einsum_dense("...x,xy->...y", +#' output_shape = 64, +#' bias_axes = "y") +#' +#' output # shape(NA, 32, 64) +#' ``` +#' +#' # Methods +#' - ```r +#' enable_lora( +#' rank, +#' a_initializer = 'he_uniform', +#' b_initializer = 'zeros' +#' ) +#' ``` +#' +#' - ```r +#' quantize(mode) +#' ``` +#' +#' # Readonly properties: +#' +#' - `kernel` +#' +#' @param equation +#' An equation describing the einsum to perform. +#' This equation must be a valid einsum string of the form +#' `ab,bc->ac`, `...ab,bc->...ac`, or +#' `ab...,bc->ac...` where 'ab', 'bc', and 'ac' can be any valid einsum +#' axis expression sequence. +#' +#' @param output_shape +#' The expected shape of the output tensor +#' (excluding the batch dimension and any dimensions +#' represented by ellipses). You can specify `NA` or `NULL` for any dimension +#' that is unknown or can be inferred from the input shape. +#' +#' @param activation +#' Activation function to use. If you don't specify anything, +#' no activation is applied +#' (that is, a "linear" activation: `a(x) = x`). +#' +#' @param bias_axes +#' A string containing the output dimension(s) +#' to apply a bias to. Each character in the `bias_axes` string +#' should correspond to a character in the output portion +#' of the `equation` string. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix. +#' +#' @param bias_initializer +#' Initializer for the bias vector. +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' +#' @param lora_rank +#' Optional integer. If set, the layer's forward pass +#' will implement LoRA (Low-Rank Adaptation) +#' with the provided rank. LoRA sets the layer's kernel +#' to non-trainable and replaces it with a delta over the +#' original kernel, obtained via multiplying two lower-rank +#' trainable matrices +#' (the factorization happens on the last dimension). +#' This can be useful to reduce the +#' computation cost of fine-tuning large dense layers. +#' You can also enable LoRA on an existing +#' `EinsumDense` layer by calling `layer$enable_lora(rank)`. +#' +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return #' @export -layer_dense <- function(object, units, activation = NULL, use_bias = TRUE, - kernel_initializer = 'glorot_uniform', bias_initializer = 'zeros', - kernel_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, bias_constraint = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL - ) { - - create_layer(keras$layers$Dense, object, list( - units = as.integer(units), - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Reshapes an output to a certain shape. -#' -#' @inheritParams layer_activation -#' -#' @param target_shape List of integers, does not include the samples dimension -#' (batch size). -#' -#' @section Input and Output Shapes: -#' -#' Input shape: Arbitrary, although all dimensions in the input shaped must be -#' fixed. -#' -#' Output shape: `(batch_size,) + target_shape`. -#' #' @family core layers -#' -#' @export -layer_reshape <- function(object, target_shape, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$Reshape, object, list( - target_shape = normalize_shape(target_shape), - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - +#' @family layers +# @seealso +# + +#' +#' @tether keras.layers.EinsumDense +layer_einsum_dense <- +function (object, equation, output_shape, activation = NULL, + bias_axes = NULL, kernel_initializer = "glorot_uniform", + bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, + kernel_constraint = NULL, bias_constraint = NULL, lora_rank = NULL, + ...) +{ + args <- capture_args(list(lora_rank = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + output_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$EinsumDense, object, args) } -#' Permute the dimensions of an input according to a given pattern +#' Turns positive integers (indexes) into dense vectors of fixed size. +#' +#' @description +#' e.g. `rbind(4L, 20L)` \eqn{\rightarrow}{->} `rbind(c(0.25, 0.1), c(0.6, -0.2))` #' -#' @param dims List of integers. Permutation pattern, does not include the -#' samples dimension. Indexing starts at 1. For instance, `(2, 1)` permutes -#' the first and second dimension of the input. +#' This layer can only be used on positive integer inputs of a fixed range. +#' +#' # Example #' -#' @inheritParams layer_activation +#' ```{r} +#' model <- keras_model_sequential() |> +#' layer_embedding(1000, 64) #' -#' @section Input and Output Shapes: +#' # The model will take as input an integer matrix of size (batch,input_length), +#' # and the largest integer (i.e. word index) in the input +#' # should be no larger than 999 (vocabulary size). +#' # Now model$output_shape is (NA, 10, 64), where `NA` is the batch +#' # dimension. #' -#' Input shape: Arbitrary +#' input_array <- random_integer(shape = c(32, 10), minval = 0, maxval = 1000) +#' model |> compile('rmsprop', 'mse') +#' output_array <- model |> predict(input_array, verbose = 0) +#' dim(output_array) # (32, 10, 64) +#' ``` #' -#' Output shape: Same as the input shape, but with the dimensions re-ordered -#' according to the specified pattern. +#' # Input Shape +#' 2D tensor with shape: `(batch_size, input_length)`. #' -#' @note Useful for e.g. connecting RNNs and convnets together. +#' # Output Shape +#' 3D tensor with shape: `(batch_size, input_length, output_dim)`. #' -#' @family core layers +#' # Methods +#' - ```r +#' enable_lora( +#' rank, +#' a_initializer = 'he_uniform', +#' b_initializer = 'zeros' +#' ) +#' ``` #' -#' @export -layer_permute <- function(object, dims, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$Permute, object, list( - dims = as_integer_tuple(dims, force_tuple = TRUE), - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Repeats the input n times. +#' - ```r +#' quantize(mode) +#' ``` #' -#' @inheritParams layer_dense +#' - ```r +#' quantized_build(input_shape, mode) +#' ``` #' -#' @param n integer, repetition factor. +#' - ```r +#' quantized_call(inputs) +#' ``` #' -#' @section Input shape: 2D tensor of shape `(num_samples, features)`. +#' # Readonly properties: #' -#' @section Output shape: 3D tensor of shape `(num_samples, n, features)`. +#' - `embeddings` #' -#' @family core layers +#' @param input_dim +#' Integer. Size of the vocabulary, +#' i.e. maximum integer index + 1. #' -#' @export -layer_repeat_vector <- function(object, n, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$RepeatVector, object, list( - n = as.integer(n), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Wraps arbitrary expression as a layer +#' @param output_dim +#' Integer. Dimension of the dense embedding. #' -#' @inheritParams layer_dense +#' @param embeddings_initializer +#' Initializer for the `embeddings` +#' matrix (see `keras3::initializer_*`). #' -#' @param f The function to be evaluated. Takes input tensor as first -#' argument. -#' @param output_shape Expected output shape from the function (not required -#' when using TensorFlow back-end). -#' @param mask mask -#' @param arguments optional named list of keyword arguments to be passed to the -#' function. +#' @param embeddings_regularizer +#' Regularizer function applied to +#' the `embeddings` matrix (see `keras3::regularizer_*`). #' -#' @section Input shape: Arbitrary. Use the keyword argument input_shape (list -#' of integers, does not include the samples axis) when using this layer as -#' the first layer in a model. +#' @param embeddings_constraint +#' Constraint function applied to +#' the `embeddings` matrix (see `keras3::constraint_*`). #' -#' @section Output shape: Arbitrary (based on tensor returned from the function) +#' @param mask_zero +#' Boolean, whether or not the input value 0 is a special +#' "padding" value that should be masked out. +#' This is useful when using recurrent layers which +#' may take variable length input. If this is `TRUE`, +#' then all subsequent layers in the model need +#' to support masking or an exception will be raised. +#' If `mask_zero` is set to `TRUE`, as a consequence, +#' index 0 cannot be used in the vocabulary (`input_dim` should +#' equal size of vocabulary + 1). #' -#' @family core layers +#' @param weights +#' Optional floating-point matrix of size +#' `(input_dim, output_dim)`. The initial embeddings values +#' to use. #' +#' @param lora_rank +#' Optional integer. If set, the layer's forward pass +#' will implement LoRA (Low-Rank Adaptation) +#' with the provided rank. LoRA sets the layer's embeddings +#' matrix to non-trainable and replaces it with a delta over the +#' original matrix, obtained via multiplying two lower-rank +#' trainable matrices. This can be useful to reduce the +#' computation cost of fine-tuning large embedding layers. +#' You can also enable LoRA on an existing +#' `Embedding` layer instance by calling `layer$enable_lora(rank)`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_lambda <- function(object, f, output_shape = NULL, mask = NULL, arguments = NULL, - input_shape = NULL, batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - `function` = f, - mask = mask, - arguments = arguments, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (backend()$backend() %in% c("theano", "cntk")) - args$output_shape <- as_integer_tuple(output_shape, force_tuple = TRUE) - else if(!is.null(output_shape)) - args$output_shape <- normalize_shape(output_shape) - - - create_layer(keras$layers$Lambda, object, args) - +#' @family core layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Embedding +layer_embedding <- +function (object, input_dim, output_dim, embeddings_initializer = "uniform", + embeddings_regularizer = NULL, embeddings_constraint = NULL, + mask_zero = FALSE, weights = NULL, lora_rank = NULL, ...) +{ + args <- capture_args(list(input_dim = as_integer, output_dim = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape, input_length = as_integer), + ignore = "object") + create_layer(keras$layers$Embedding, object, args) } -#' Layer that applies an update to the cost function based input activity. +#' Identity layer. #' -#' @inheritParams layer_dense +#' @description +#' This layer should be used as a placeholder when no operation is to be +#' performed. The layer just returns its `inputs` argument as output. #' -#' @param l1 L1 regularization factor (positive float). -#' @param l2 L2 regularization factor (positive float). +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @section Input shape: Arbitrary. Use the keyword argument `input_shape` (list -#' of integers, does not include the samples axis) when using this layer as -#' the first layer in a model. -#' -#' @section Output shape: Same shape as input. -#' -#' @family core layers +#' @param ... +#' For forward/backward compatability. #' +#' @inherit layer_dense return #' @export -layer_activity_regularization <- function(object, l1 = 0.0, l2 = 0.0, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, - dtype = NULL, name = NULL, trainable = NULL, - weights = NULL) { +#' @family core layers +#' @family layers +# @seealso +# + +#' @tether keras.layers.Identity +layer_identity <- +function (object, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Identity, object, args) +} - create_layer(keras$layers$ActivityRegularization, object, list( - l1 = l1, - l2 = l2, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) +#' Wraps arbitrary expressions as a `Layer` object. +#' +#' @description +#' The `layer_lambda()` layer exists so that arbitrary expressions can be used +#' as a `Layer` when constructing Sequential +#' and Functional API models. `Lambda` layers are best suited for simple +#' operations or quick experimentation. For more advanced use cases, +#' prefer writing new subclasses of `Layer` using [`new_layer_class()`]. +#' +#' +#' # Examples +#' ```{r} +#' # add a x -> x^2 layer +#' model <- keras_model_sequential() +#' model |> layer_lambda(\(x) x^2) +#' ``` +#' +#' @param f +#' The function to be evaluated. Takes input tensor as first +#' argument. +#' +#' @param output_shape +#' Expected output shape from function. This argument +#' can usually be inferred if not explicitly provided. +#' Can be a list or function. If a list, it only specifies +#' the first dimension onward; sample dimension is assumed +#' either the same as the input: +#' `output_shape = c(input_shape[1], output_shape)` or, +#' the input is `NULL` and the sample dimension is also `NULL`: +#' `output_shape = c(NA, output_shape)`. +#' If a function, it specifies the +#' entire shape as a function of the input shape: +#' `output_shape = f(input_shape)`. +#' +#' @param mask +#' Either `NULL` (indicating no masking) or a callable with the same +#' signature as the `compute_mask` layer method, or a tensor +#' that will be returned as output mask regardless +#' of what the input is. +#' +#' @param arguments +#' Optional named list of arguments to be passed to the +#' function. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family core layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.Lambda +layer_lambda <- +function (object, f, output_shape = NULL, mask = NULL, arguments = NULL, + ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + output_shape = normalize_shape), ignore = "object") + names(args)[match("f", names(args))] <- "function" + create_layer(keras$layers$Lambda, object, args) } + #' Masks a sequence by using a mask value to skip timesteps. #' -#' For each timestep in the input tensor (dimension #1 in the tensor), if all -#' values in the input tensor at that timestep are equal to `mask_value`, then -#' the timestep will be masked (skipped) in all downstream layers (as long as -#' they support masking). If any downstream layer does not support masking yet -#' receives such an input mask, an exception will be raised. +#' @description +#' For each timestep in the input tensor (dimension #1 in the tensor), +#' if all values in the input tensor at that timestep +#' are equal to `mask_value`, then the timestep will be masked (skipped) +#' in all downstream layers (as long as they support masking). #' -#' @inheritParams layer_dense +#' If any downstream layer does not support masking yet receives such +#' an input mask, an exception will be raised. #' -#' @param mask_value float, mask value +#' # Examples +#' Consider an array `x` of shape `c(samples, timesteps, features)`, +#' to be fed to an LSTM layer. You want to mask timestep #3 and #5 because you +#' lack data for these timesteps. You can: #' -#' @family core layers +#' - Set `x[, 3, ] <- 0.` and `x[, 5, ] <- 0.` +#' - Insert a `layer_masking()` layer with `mask_value = 0.` before the LSTM layer: #' -#' @export -layer_masking <- function(object, mask_value = 0.0, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$Masking, object, list( - mask_value = mask_value, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) - -} - - - -#' Flattens an input +#' ```{r} +#' c(samples, timesteps, features) %<-% c(32, 10, 8) +#' inputs <- c(samples, timesteps, features) %>% { array(runif(prod(.)), dim = .) } +#' inputs[, 3, ] <- 0 +#' inputs[, 5, ] <- 0 #' -#' Flatten a given input, does not affect the batch size. +#' model <- keras_model_sequential() %>% +#' layer_masking(mask_value = 0) %>% +#' layer_lstm(32) #' -#' @inheritParams layer_activation +#' output <- model(inputs) +#' # The time step 3 and 5 will be skipped from LSTM calculation. +#' ``` #' -#' @param data_format A string. one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. The purpose -#' of this argument is to preserve weight ordering when switching a model from -#' one data format to another. `channels_last` corresponds to inputs with -#' shape `(batch, ..., channels)` while `channels_first` corresponds to inputs -#' with shape `(batch, channels, ...)`. It defaults to the `image_data_format` -#' value found in your Keras config file at `~/.keras/keras.json`. If you -#' never set it, then it will be "channels_last". +#' # Note +#' in the Keras masking convention, a masked timestep is denoted by +#' a mask value of `FALSE`, while a non-masked (i.e. usable) timestep +#' is denoted by a mask value of `TRUE`. #' -#' @family core layers +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @export -layer_flatten <- function(object, data_format = NULL, input_shape = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - input_shape = normalize_shape(input_shape), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.2.0") { - args$data_format <- data_format - } else if (keras_version() >= "2.1.6") { - if (is.null(data_format)) - data_format <- "channels_last" - args$data_format <- data_format - } - - create_layer(keras$layers$Flatten, object, args) - -} - - - - - -as_integer <- function(x) { - if (is.numeric(x)) - as.integer(x) - else - x -} - -as_integer_tuple <- function(x, force_tuple = FALSE) { - if (is.null(x)) - x - else if (is.list(x) || force_tuple) - tuple(as.list(as.integer(x))) - else - as.integer(x) -} - -as_nullable_integer <- function(x) { - if (is.null(x)) - x - else - as.integer(x) -} - -as_layer_index <- function(x) { - if (is.null(x)) - return(x) - - x <- as.integer(x) - - if (x == 0L) - stop("`index` for get_layer() is 1-based (0 was passed as the index)") - - if (x > 0L) - x - 1L - else - x -} - -# Helper function to normalize paths -normalize_path <- function(path) { - if (is.null(path)) - NULL - else - normalizePath(path.expand(path), mustWork = FALSE) -} - - - -# Helper function to coerce shape arguments to tuple -# tf$reshape()/k_reshape() doesn't accept a tf.TensorShape object -normalize_shape <- function(shape) { - - # reflect NULL back - if (is.null(shape)) - return(shape) - - # if it's a list or a numeric vector then convert to integer - # NA's in are accepted as NULL - # also accept c(NA), as if it was a numeric - if (is.list(shape) || is.numeric(shape) || - (is.logical(shape) && all(is.na(shape)))) { - - shape <- lapply(shape, function(value) { - # Pass through python objects unmodified, only coerce R objects - # supplied shapes, e.g., to tf$random$normal, can be a list that's a mix - # of scalar integer tensors and regular integers - if (inherits(value, "python.builtin.object")) - return(value) - - # accept NA,NA_integer_,NA_real_ as NULL - if ((is_scalar(value) && is.na(value))) - return(NULL) - - if (!is.null(value)) - as.integer(value) - else - NULL - }) - } - - if (inherits(shape, "tensorflow.python.framework.tensor_shape.TensorShape")) - shape <- as.list(shape$as_list()) # unpack for tuple() - - # coerce to tuple so it's iterable - tuple(shape) -} - -# @export -# format.python.builtin.object <- function(x, ...) { -# capture.output(print(x, ...)) -# } - -as_shape <- function(x) { - lapply(x, function(d) { - if (is.null(d)) - NULL - else - as.integer(d) - }) -} - -#' Create a Keras Layer -#' -#' @param layer_class Python layer class or R6 class of type KerasLayer -#' @param object Object to compose layer with. This is either a -#' [keras_model_sequential()] to add the layer to, or another Layer which -#' this layer will call. -#' @param args List of arguments to layer constructor function -#' -#' @return A Keras layer +#' @param ... +#' For forward/backward compatability. #' -#' @note The `object` parameter can be missing, in which case the -#' layer is created without a connection to an existing graph. +#' @param mask_value +#' see description #' +#' @inherit layer_dense return #' @export -create_layer <- function(layer_class, object, args = list()) { - - safe_to_drop_nulls <- c( - "input_shape", - "batch_input_shape", - "batch_size", - "dtype", - "name", - "trainable", - "weights" - ) - for (nm in safe_to_drop_nulls) - args[[nm]] <- args[[nm]] - - # convert custom constraints - constraint_args <- grepl("^.*_constraint$", names(args)) - constraint_args <- names(args)[constraint_args] - for (arg in constraint_args) - args[[arg]] <- as_constraint(args[[arg]]) - - if (inherits(layer_class, "R6ClassGenerator")) { - - if (identical(layer_class$get_inherit(), KerasLayer)) { - # old-style custom class, inherits KerasLayer - c(layer, args) %<-% compat_custom_KerasLayer_handler(layer_class, args) - layer_class <- function(...) layer - } else { - # new-style custom class, inherits anything else, typically keras$layers$Layer - layer_class <- r_to_py(layer_class, convert = TRUE) - } - } - - # create layer from class - layer <- do.call(layer_class, args) - - # compose if we have an x - if (missing(object) || is.null(object)) - layer - else - invisible(compose_layer(object, layer)) -} - - -# Helper function to compose a layer with an object of type Model or Layer - -compose_layer <- function(object, layer, ...) { - UseMethod("compose_layer") -} - -compose_layer.default <- function(object, layer, ...) { - layer(object, ...) -} - -compose_layer.keras.models.Sequential <- function(object, layer, ...) { - if(length(list(...)) > 0) warning("arguments passed via ellipsis will be ignored") - - object$add(layer) - object +#' @family core layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Masking +layer_masking <- +function (object, mask_value = 0, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Masking, object, args) } - -compose_layer.keras.engine.sequential.Sequential <- compose_layer.keras.models.Sequential diff --git a/R/layers-dropout.R b/R/layers-dropout.R deleted file mode 100644 index fd36185e7c..0000000000 --- a/R/layers-dropout.R +++ /dev/null @@ -1,163 +0,0 @@ - -#' Applies Dropout to the input. -#' -#' Dropout consists in randomly setting a fraction `rate` of input units to 0 at -#' each update during training time, which helps prevent overfitting. -#' -#' @inheritParams layer_dense -#' -#' @param rate float between 0 and 1. Fraction of the input units to drop. -#' @param noise_shape 1D integer tensor representing the shape of the binary -#' dropout mask that will be multiplied with the input. For instance, if your -#' inputs have shape `(batch_size, timesteps, features)` and you want the -#' dropout mask to be the same for all timesteps, you can use -#' `noise_shape=c(batch_size, 1, features)`. -#' @param seed integer to use as random seed. -#' -#' @family core layers -#' @family dropout layers -#' -#' @export -layer_dropout <- function(object, rate, noise_shape = NULL, seed = NULL, - input_shape = NULL, batch_input_shape = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$Dropout, object, list( - rate = rate, - noise_shape = normalize_shape(noise_shape), - seed = seed, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Spatial 1D version of Dropout. -#' -#' This version performs the same function as Dropout, however it drops entire -#' 1D feature maps instead of individual elements. If adjacent frames within -#' feature maps are strongly correlated (as is normally the case in early -#' convolution layers) then regular dropout will not regularize the activations -#' and will otherwise just result in an effective learning rate decrease. In -#' this case, `layer_spatial_dropout_1d` will help promote independence between -#' feature maps and should be used instead. -#' -#' @inheritParams layer_dropout -#' -#' @section Input shape: 3D tensor with shape: `(samples, timesteps, channels)` -#' -#' @section Output shape: Same as input -#' -#' @section References: - [Efficient Object Localization Using Convolutional -#' Networks](https://arxiv.org/abs/1411.4280) -#' -#' @family dropout layers -#' -#' @export -layer_spatial_dropout_1d <- function(object, rate, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$SpatialDropout1D, object, list( - rate = rate, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} - -#' Spatial 2D version of Dropout. -#' -#' This version performs the same function as Dropout, however it drops entire -#' 2D feature maps instead of individual elements. If adjacent pixels within -#' feature maps are strongly correlated (as is normally the case in early -#' convolution layers) then regular dropout will not regularize the activations -#' and will otherwise just result in an effective learning rate decrease. In -#' this case, `layer_spatial_dropout_2d` will help promote independence between -#' feature maps and should be used instead. -#' -#' @inheritParams layer_spatial_dropout_1d -#' -#' @param rate float between 0 and 1. Fraction of the input units to drop. -#' @param data_format 'channels_first' or 'channels_last'. In 'channels_first' -#' mode, the channels dimension (the depth) is at index 1, in 'channels_last' -#' mode is it at index 3. It defaults to the `image_data_format` value found -#' in your Keras config file at `~/.keras/keras.json`. If you never set it, -#' then it will be "channels_last". -#' -#' @section Input shape: 4D tensor with shape: `(samples, channels, rows, cols)` -#' if data_format='channels_first' or 4D tensor with shape: `(samples, rows, -#' cols, channels)` if data_format='channels_last'. -#' -#' @section Output shape: Same as input -#' -#' @section References: - [Efficient Object Localization Using Convolutional -#' Networks](https://arxiv.org/abs/1411.4280) -#' -#' @family dropout layers -#' -#' @export -layer_spatial_dropout_2d <- function(object, rate, data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$SpatialDropout2D, object, list( - rate = rate, - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} - - - -#' Spatial 3D version of Dropout. -#' -#' This version performs the same function as Dropout, however it drops entire -#' 3D feature maps instead of individual elements. If adjacent voxels within -#' feature maps are strongly correlated (as is normally the case in early -#' convolution layers) then regular dropout will not regularize the activations -#' and will otherwise just result in an effective learning rate decrease. In -#' this case, `layer_spatial_dropout_3d` will help promote independence between -#' feature maps and should be used instead. -#' -#' @inheritParams layer_spatial_dropout_1d -#' -#' @param data_format 'channels_first' or 'channels_last'. In 'channels_first' -#' mode, the channels dimension (the depth) is at index 1, in 'channels_last' -#' mode is it at index 4. It defaults to the `image_data_format` value found -#' in your Keras config file at `~/.keras/keras.json`. If you never set it, -#' then it will be "channels_last". -#' -#' @section Input shape: 5D tensor with shape: `(samples, channels, dim1, dim2, -#' dim3)` if data_format='channels_first' or 5D tensor with shape: `(samples, -#' dim1, dim2, dim3, channels)` if data_format='channels_last'. -#' -#' @section Output shape: Same as input -#' -#' @section References: - [Efficient Object Localization Using Convolutional -#' Networks](https://arxiv.org/abs/1411.4280) -#' -#' @family dropout layers -#' -#' @export -layer_spatial_dropout_3d <- function(object, rate, data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$SpatialDropout3D, object, list( - rate = rate, - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - -} diff --git a/R/layers-embedding.R b/R/layers-embedding.R deleted file mode 100644 index 3bcccbab75..0000000000 --- a/R/layers-embedding.R +++ /dev/null @@ -1,56 +0,0 @@ - - -#' Turns positive integers (indexes) into dense vectors of fixed size. -#' -#' For example, `list(4L, 20L) -> list(c(0.25, 0.1), c(0.6, -0.2))` This layer -#' can only be used as the first layer in a model. -#' -#' @inheritParams layer_dense -#' -#' @param input_dim int > 0. Size of the vocabulary, i.e. maximum integer -#' index + 1. -#' @param output_dim int >= 0. Dimension of the dense embedding. -#' @param embeddings_initializer Initializer for the `embeddings` matrix. -#' @param embeddings_regularizer Regularizer function applied to the -#' `embeddings` matrix. -#' @param activity_regularizer activity_regularizer -#' @param embeddings_constraint Constraint function applied to the `embeddings` -#' matrix. -#' @param mask_zero Whether or not the input value 0 is a special "padding" -#' value that should be masked out. This is useful when using recurrent -#' layers, which may take variable length inputs. If this is `TRUE` then all -#' subsequent layers in the model need to support masking or an exception will -#' be raised. If mask_zero is set to TRUE, as a consequence, index 0 cannot be -#' used in the vocabulary (input_dim should equal size of vocabulary + 1). -#' @param input_length Length of input sequences, when it is constant. This -#' argument is required if you are going to connect `Flatten` then `Dense` -#' layers upstream (without it, the shape of the dense outputs cannot be -#' computed). -#' -#' @section Input shape: 2D tensor with shape: `(batch_size, sequence_length)`. -#' -#' @section Output shape: 3D tensor with shape: `(batch_size, sequence_length, -#' output_dim)`. -#' -#' @section References: -#' - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) -#' -#' @export -layer_embedding <- function(object, input_dim, output_dim, embeddings_initializer = "uniform", embeddings_regularizer = NULL, - activity_regularizer = NULL, embeddings_constraint = NULL, mask_zero = FALSE, input_length = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$Embedding, object, list( - input_dim = as.integer(input_dim), - output_dim = as.integer(output_dim), - embeddings_initializer = embeddings_initializer, - embeddings_regularizer = embeddings_regularizer, - activity_regularizer = activity_regularizer, - embeddings_constraint = embeddings_constraint, - mask_zero = mask_zero, - input_length = if (!is.null(input_length)) as.integer(input_length) else NULL, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) -} diff --git a/R/layers-features.R b/R/layers-features.R deleted file mode 100644 index 8ddadd9f68..0000000000 --- a/R/layers-features.R +++ /dev/null @@ -1,37 +0,0 @@ -#' Constructs a DenseFeatures. -#' -#' A layer that produces a dense Tensor based on given feature_columns. -#' -#' @inheritParams layer_dense -#' -#' @param feature_columns An iterable containing the FeatureColumns to use as -#' inputs to your model. All items should be instances of classes derived from -#' `DenseColumn` such as `numeric_column`, `embedding_column`, `bucketized_column`, -#' `indicator_column`. If you have categorical features, you can wrap them with an -#' `embedding_column` or `indicator_column`. See `tfestimators::feature_columns()`. -#' -#' @family core layers -#' -#' @export -layer_dense_features <- function(object, feature_columns, name = NULL, - trainable = NULL, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - weights = NULL) { - - if (!is_tensorflow_implementation() || !tensorflow::tf_version() >= "1.14") - stop("layer_dense_features requires TensorFlow implementation and version >= 1.14") - - # feature_columns must be unamed otherwise they are converted to a dict - names(feature_columns) <- NULL - - create_layer(keras$layers$DenseFeatures, object, list( - feature_columns = feature_columns, - name = name, - trainable = trainable, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - weights = weights - )) -} diff --git a/R/layers-locally-connected.R b/R/layers-locally-connected.R deleted file mode 100644 index 82a29b2a14..0000000000 --- a/R/layers-locally-connected.R +++ /dev/null @@ -1,159 +0,0 @@ - -#' Locally-connected layer for 1D inputs. -#' -#' `layer_locally_connected_1d()` works similarly to [layer_conv_1d()] , except -#' that weights are unshared, that is, a different set of filters is applied at -#' each different patch of the input. -#' -#' @inheritParams layer_conv_2d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number output of filters in the convolution). -#' @param kernel_size An integer or list of a single integer, specifying the -#' length of the 1D convolution window. -#' @param strides An integer or list of a single integer, specifying the stride -#' length of the convolution. Specifying any stride value != 1 is incompatible -#' with specifying any `dilation_rate` value != 1. -#' @param padding Currently only supports `"valid"` (case-insensitive). `"same"` -#' may be supported in the future. -#' @param implementation either 1, 2, or 3. 1 loops over input spatial locations -#' to perform the forward pass. It is memory-efficient but performs a lot of -#' (small) ops. 2 stores layer weights in a dense but sparsely-populated 2D -#' matrix and implements the forward pass as a single matrix-multiply. It uses -#' a lot of RAM but performs few (large) ops. 3 stores layer weights in a -#' sparse tensor and implements the forward pass as a single sparse -#' matrix-multiply. How to choose: 1: large, dense models, 2: small models, 3: -#' large, sparse models, where "large" stands for large input/output -#' activations (i.e. many `filters, input_filters, large input_size, output_size`), -#' and "sparse" stands for few connections between inputs and outputs, i.e. -#' small ratio `filters * input_filters * kernel_size / (input_size * strides)`, -#' where inputs to and outputs of the layer are assumed to have shapes -#' `(input_size, input_filters)`, `(output_size, filters)` respectively. -#' It is recommended to benchmark each in the setting of interest to pick the -#' most efficient one (in terms of speed and memory usage). Correct choice of -#' implementation can lead to dramatic speed improvements (e.g. 50X), -#' potentially at the expense of RAM. Also, only `padding="valid"` is -#' supported by `implementation=1`. -#' -#' @section Input shape: 3D tensor with shape: `(batch_size, steps, input_dim)` -#' -#' @section Output shape: 3D tensor with shape: `(batch_size, new_steps, -#' filters)` `steps` value might have changed due to padding or strides. -#' -#' @family locally connected layers -#' -#' @export -layer_locally_connected_1d <- function(object, filters, kernel_size, strides = 1L, padding = "valid", data_format = NULL, - activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", - bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, - activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, - implementation = 1L, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$LocallyConnected1D, object, list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - implementation = as.integer(implementation), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) -} - - - -#' Locally-connected layer for 2D inputs. -#' -#' `layer_locally_connected_2d` works similarly to [layer_conv_2d()], except -#' that weights are unshared, that is, a different set of filters is applied at -#' each different patch of the input. -#' -#' @inheritParams layer_locally_connected_1d -#' -#' @param filters Integer, the dimensionality of the output space (i.e. the -#' number output of filters in the convolution). -#' @param kernel_size An integer or list of 2 integers, specifying the width and -#' height of the 2D convolution window. Can be a single integer to specify the -#' same value for all spatial dimensions. -#' @param strides An integer or list of 2 integers, specifying the strides of -#' the convolution along the width and height. Can be a single integer to -#' specify the same value for all spatial dimensions. Specifying any stride -#' value != 1 is incompatible with specifying any `dilation_rate` value != 1. -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, width, height, -#' channels)` while `channels_first` corresponds to inputs with shape `(batch, -#' channels, width, height)`. It defaults to the `image_data_format` value -#' found in your Keras config file at `~/.keras/keras.json`. If you never set -#' it, then it will be "channels_last". -#' @param implementation either 1, 2, or 3. 1 loops over input spatial locations -#' to perform the forward pass. It is memory-efficient but performs a lot of -#' (small) ops. 2 stores layer weights in a dense but sparsely-populated 2D -#' matrix and implements the forward pass as a single matrix-multiply. It uses -#' a lot of RAM but performs few (large) ops. 3 stores layer weights in a -#' sparse tensor and implements the forward pass as a single sparse -#' matrix-multiply. How to choose: 1: large, dense models, 2: small models, 3: -#' large, sparse models, where "large" stands for large input/output -#' activations (i.e. many `filters, input_filters, large input_size, output_size`), -#' and "sparse" stands for few connections between inputs and outputs, i.e. -#' small ratio `filters * input_filters * kernel_size / (input_size * strides)`, -#' where inputs to and outputs of the layer are assumed to have shapes -#' `(input_size, input_filters)`, `(output_size, filters)` respectively. -#' It is recommended to benchmark each in the setting of interest to pick the -#' most efficient one (in terms of speed and memory usage). Correct choice of -#' implementation can lead to dramatic speed improvements (e.g. 50X), -#' potentially at the expense of RAM. Also, only `padding="valid"` is -#' supported by `implementation=1`. -#' -#' @section Input shape: 4D tensor with shape: `(samples, channels, rows, cols)` -#' if data_format='channels_first' or 4D tensor with shape: `(samples, rows, -#' cols, channels)` if data_format='channels_last'. -#' -#' @section Output shape: 4D tensor with shape: `(samples, filters, new_rows, -#' new_cols)` if data_format='channels_first' or 4D tensor with shape: -#' `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. -#' `rows` and `cols` values might have changed due to padding. -#' -#' @family locally connected layers -#' -#' @export -layer_locally_connected_2d <- function(object, filters, kernel_size, strides = c(1L, 1L), padding = "valid", data_format = NULL, - activation = NULL, use_bias = TRUE, kernel_initializer = "glorot_uniform", - bias_initializer = "zeros", kernel_regularizer = NULL, bias_regularizer = NULL, - activity_regularizer = NULL, kernel_constraint = NULL, bias_constraint = NULL, - implementation = 1L, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$LocallyConnected2D, object, list( - filters = as.integer(filters), - kernel_size = as_integer_tuple(kernel_size), - strides = as_integer_tuple(strides), - padding = padding, - data_format = data_format, - activation = activation, - use_bias = use_bias, - kernel_initializer = kernel_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - bias_constraint = bias_constraint, - implementation = as.integer(implementation), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) -} diff --git a/R/layers-merge.R b/R/layers-merge.R deleted file mode 100644 index 7d63ebe32a..0000000000 --- a/R/layers-merge.R +++ /dev/null @@ -1,225 +0,0 @@ - - -#' Layer that adds a list of inputs. -#' -#' It takes as input a list of tensors, all of the same shape, and returns a -#' single tensor (also of the same shape). -#' -#' @param inputs A list of input tensors (at least 2). Can be missing. -#' @param ... Standard layer arguments (must be named). -#' -#' @return A tensor, the sum of the inputs. If `inputs` is missing, a keras -#' layer instance is returned. -#' -#' @family merging_layers -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_add <- function(inputs, ...) { - callable <- if(missing(inputs)) keras$layers$Add else keras$layers$add - args <- capture_args(match.call(), list(batch_size = as_nullable_integer)) - do.call(callable, args) -} - -# TODO: there should be a common topic where we can use -# @inheritDotParams standard-layer-args - - -#' Layer that subtracts two inputs. -#' -#' It takes as input a list of tensors of size 2, both of the same shape, and -#' returns a single tensor, (`inputs[[1]] - inputs[[2]]`), also of the same -#' shape. -#' -#' @param inputs A list of input tensors (exactly 2). Can be missing. -#' @param ... Standard layer arguments (must be named). -#' -#' @return A tensor, the difference of the inputs. If `inputs` is missing, a -#' keras layer instance is returned. -#' -#' @family merge layers -#' -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_subtract <- function(inputs, ...) { - callable <- if (missing(inputs)) keras$layers$Subtract else keras$layers$subtract - args <- capture_args(match.call(), list(batch_size = as_nullable_integer)) - do.call(callable, args) -} - -#' Layer that multiplies (element-wise) a list of inputs. -#' -#' It takes as input a list of tensors, all of the same shape, and returns a -#' single tensor (also of the same shape). -#' -#' @param inputs A list of input tensors (at least 2). Can be missing. -#' @param ... Standard layer arguments (must be named). -#' -#' @return A tensor, the element-wise product of the inputs. If `inputs` is -#' missing, a keras layer instance is returned. -#' -#' @family merge layers -#' -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_multiply <- function(inputs, ...) { - callable <- if (missing(inputs)) keras$layers$Multiply else keras$layers$multiply - args <- capture_args(match.call(), list(batch_size = as_nullable_integer)) - do.call(callable, args) - -} - - -#' Layer that averages a list of inputs. -#' -#' It takes as input a list of tensors, all of the same shape, and returns a -#' single tensor (also of the same shape). -#' -#' @param inputs A list of input tensors (at least 2). Can be missing. -#' @param ... Standard layer arguments (must be named). -#' -#' @return A tensor, the average of the inputs. If `inputs` is missing, a keras -#' layer instance is returned. -#' -#' @family merge layers -#' -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_average <- function(inputs, ...) { - callable <- if (missing(inputs)) keras$layers$Average else keras$layers$average - args <- capture_args(match.call(), list(batch_size = as_nullable_integer)) - do.call(callable, args) - -} - -#' Layer that computes the maximum (element-wise) a list of inputs. -#' -#' It takes as input a list of tensors, all of the same shape, and returns a -#' single tensor (also of the same shape). -#' -#' @param inputs A list of input tensors (at least 2). Can be missing. -#' @param ... Standard layer arguments (must be named). -#' -#' @return A tensor, the element-wise maximum of the inputs. If `inputs` is -#' missing, a keras layer instance is returned. -#' -#' @family merge layers -#' -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_maximum <- function(inputs, ...) { - callable <- if (missing(inputs)) keras$layers$Maximum else keras$layers$maximum - args <- capture_args(match.call(), list(batch_size = as_nullable_integer)) - do.call(callable, args) - -} - - -#' Layer that computes the minimum (element-wise) a list of inputs. -#' -#' It takes as input a list of tensors, all of the same shape, and returns a -#' single tensor (also of the same shape). -#' -#' @param inputs A list of input tensors (at least 2). Can be missing. -#' @param ... Standard layer arguments (must be named). -#' -#' @return A tensor, the element-wise maximum of the inputs. If `inputs` is -#' missing, a keras layer instance is returned. -#' -#' @family merge layers -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_minimum <- function(inputs, ...) { - callable <- if (missing(inputs)) keras$layers$Minimum else keras$layers$minimum - args <- capture_args(match.call(), list(batch_size = as_nullable_integer)) - do.call(callable, args) -} - - -#' Layer that concatenates a list of inputs. -#' -#' It takes as input a list of tensors, all of the same shape expect for the -#' concatenation axis, and returns a single tensor, the concatenation of all -#' inputs. -#' -#' @param inputs A list of input tensors (at least 2). Can be missing. -#' @param axis Concatenation axis. -#' @param ... Standard layer arguments (must be named). -#' -#' @return A tensor, the concatenation of the inputs alongside axis `axis`. If -#' `inputs` is missing, a keras layer instance is returned. -#' -#' @family merge layers -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_concatenate <- function(inputs, axis = -1, ...) { - callable <- if (missing(inputs)) keras$layers$Concatenate else keras$layers$concatenate - # TODO: this axis should probably be 1-based - args <- capture_args(match.call(), list(batch_size = as_nullable_integer, - axis = as.integer)) - do.call(callable, args) -} - -#' Layer that computes a dot product between samples in two tensors. -#' -#' @param inputs A list of input tensors (at least 2). Can be missing. -#' @param axes Integer or list of integers, axis or axes along which to take the -#' dot product. -#' @param normalize Whether to L2-normalize samples along the dot product axis -#' before taking the dot product. If set to TRUE, then the output of the dot -#' product is the cosine proximity between the two samples. -#' @param ... Standard layer arguments (must be named). -#' -#' @return If `inputs` is supplied: A tensor, the dot product of the samples -#' from the inputs. If `inputs` is missing, a keras layer instance is -#' returned. -#' -#' -#' @family merge layers -#' -#' @seealso -#' + -#' + -#' + -#' -#' @export -layer_dot <- function(inputs, axes, normalize = FALSE, ...) { - callable <- if (missing(inputs)) keras$layers$Dot else keras$layers$dot - args <- capture_args(match.call(), list(batch_size = as_nullable_integer, - axes = as.integer)) - do.call(callable, args) -} diff --git a/R/layers-merging.R b/R/layers-merging.R new file mode 100644 index 0000000000..14b908690a --- /dev/null +++ b/R/layers-merging.R @@ -0,0 +1,523 @@ + + + +#' Performs elementwise addition operation. +#' +#' @description +#' It takes as input a list of tensors, all of the same shape, +#' and returns a single tensor (also of the same shape). +#' +#' # Examples +#' ```{r} +#' input_shape <- c(1, 2, 3) +#' x1 <- op_ones(input_shape) +#' x2 <- op_ones(input_shape) +#' layer_add(x1, x2) +#' ``` +#' +#' Usage in a Keras model: +#' +#' ```{r} +#' input1 <- layer_input(shape = c(16)) +#' x1 <- input1 |> layer_dense(8, activation = 'relu') +#' +#' input2 <- layer_input(shape = c(32)) +#' x2 <- input2 |> layer_dense(8, activation = 'relu') +#' +#' # equivalent to `added = layer_add([x1, x2))` +#' added <- layer_add(x1, x2) +#' output <- added |> layer_dense(4) +#' +#' model <- keras_model(inputs = c(input1, input2), outputs = output) +#' ``` +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param inputs +#' layers to combine +#' +#' @inherit layer_dense return +#' @export +#' @family add merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Add +layer_add <- +function (inputs, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Add, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} + + +#' Averages a list of inputs element-wise.. +#' +#' @description +#' It takes as input a list of tensors, all of the same shape, +#' and returns a single tensor (also of the same shape). +#' +#' # Examples +#' ```{r} +#' input_shape <- c(1, 2, 3) +#' x1 <- op_ones(input_shape) +#' x2 <- op_zeros(input_shape) +#' layer_average(x1, x2) +#' ``` +#' +#' Usage in a Keras model: +#' +#' ```{r} +#' input1 <- layer_input(shape = c(16)) +#' x1 <- input1 |> layer_dense(8, activation = 'relu') +#' +#' input2 <- layer_input(shape = c(32)) +#' x2 <- input2 |> layer_dense(8, activation = 'relu') +#' +#' added <- layer_average(x1, x2) +#' output <- added |> layer_dense(4) +#' +#' model <- keras_model(inputs = c(input1, input2), outputs = output) +#' ``` +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param inputs +#' layers to combine +#' +#' @inherit layer_dense return +#' @export +#' @family average merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Average +layer_average <- +function (inputs, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Average, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} + + +#' Concatenates a list of inputs. +#' +#' @description +#' It takes as input a list of tensors, all of the same shape except +#' for the concatenation axis, and returns a single tensor that is the +#' concatenation of all inputs. +#' +#' # Examples +#' ```{r} +#' x <- op_arange(20) |> op_reshape(c(2, 2, 5)) +#' y <- op_arange(20, 40) |> op_reshape(c(2, 2, 5)) +#' layer_concatenate(x, y, axis = 2) +#' ``` +#' Usage in a Keras model: +#' +#' ```{r} +#' x1 <- op_arange(10) |> op_reshape(c(5, 2)) |> layer_dense(8) +#' x2 <- op_arange(10, 20) |> op_reshape(c(5, 2)) |> layer_dense(8) +#' y <- layer_concatenate(x1, x2) +#' ``` +#' +#' @returns +#' A tensor, the concatenation of the inputs alongside axis `axis`. +#' +#' @param axis +#' Axis along which to concatenate. +#' +#' @param ... +#' Standard layer keyword arguments. +#' +#' @param inputs +#' layers to combine +#' +#' @export +#' @family concatenate merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Concatenate +layer_concatenate <- +function (inputs, ..., axis = -1L) +{ + args <- capture_args(list(axis = as_axis, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Concatenate, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} + + +#' Computes element-wise dot product of two tensors. +#' +#' @description +#' It takes a list of inputs of size 2, and the axes +#' corresponding to each input along with the dot product +#' is to be performed. +#' +#' Let's say `x` and `y` are the two input tensors with shapes +#' `(2, 3, 5)` and `(2, 10, 3)`. The batch dimension should be +#' of same size for both the inputs, and `axes` should correspond +#' to the dimensions that have the same size in the corresponding +#' inputs. e.g. with `axes = c(1, 2)`, the dot product of `x`, and `y` +#' will result in a tensor with shape `(2, 5, 10)` +#' +#' # Examples +#' +#' ```{r} +#' x <- op_reshape(0:9, c(1, 5, 2)) +#' y <- op_reshape(10:19, c(1, 2, 5)) +#' layer_dot(x, y, axes=c(2, 3)) +#' ``` +#' +#' Usage in a Keras model: +#' +#' ```{r} +#' x1 <- op_reshape(0:9, c(5, 2)) |> layer_dense(8) +#' x2 <- op_reshape(10:19, c(5, 2)) |> layer_dense(8) +#' shape(x1) +#' shape(x2) +#' y <- layer_dot(x1, x2, axes=2) +#' shape(y) +#' ``` +#' +#' @returns +#' A tensor, the dot product of the samples from the inputs. +#' +#' @param axes +#' Integer or list of integers, axis or axes along which to +#' take the dot product. If a list, should be two integers +#' corresponding to the desired axis from the first input and the +#' desired axis from the second input, respectively. Note that the +#' size of the two selected axes must match. +#' +#' @param normalize +#' Whether to L2-normalize samples along the dot product axis +#' before taking the dot product. If set to `TRUE`, then +#' the output of the dot product is the cosine proximity +#' between the two samples. +#' +#' @param ... +#' Standard layer keyword arguments. +#' +#' @param inputs +#' layers to combine +#' +#' @export +#' @family dot merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Dot +layer_dot <- +function (inputs, ..., axes, normalize = FALSE) +{ + args <- capture_args(list(axes = as_axis, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Dot, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} + + +#' Computes element-wise maximum on a list of inputs. +#' +#' @description +#' It takes as input a list of tensors, all of the same shape, +#' and returns a single tensor (also of the same shape). +#' +#' # Examples +#' ```{r} +#' input_shape <- c(2, 3, 4) +#' x1 <- random_uniform(input_shape) +#' x2 <- random_uniform(input_shape) +#' y <- layer_maximum(x1, x2) +#' ``` +#' +#' Usage in a Keras model: +#' +#' ```{r} +#' input1 <- layer_input(shape = c(16)) +#' x1 <- input1 |> layer_dense(8, activation = 'relu') +#' input2 <- layer_input(shape = c(32)) +#' x2 <- input2 |> layer_dense(8, activation = 'relu') +#' # equivalent to `y <- layer_maximum(x1, x2)` +#' y <- layer_maximum(x1, x2) +#' out <- y |> layer_dense(4) +#' model <- keras_model(inputs = c(input1, input2), outputs = out) +#' ``` +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param inputs +#' layers to combine +#' +#' @inherit layer_dense return +#' @export +#' @family maximum merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Maximum +layer_maximum <- +function (inputs, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Maximum, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} + + +#' Computes elementwise minimum on a list of inputs. +#' +#' @description +#' It takes as input a list of tensors, all of the same shape, +#' and returns a single tensor (also of the same shape). +#' +#' # Examples +#' ```{r} +#' input_shape <- c(2, 3, 4) +#' x1 <- random_uniform(input_shape) +#' x2 <- random_uniform(input_shape) +#' y <- layer_minimum(x1, x2) +#' ``` +#' +#' Usage in a Keras model: +#' +#' ```{r} +#' input1 <- layer_input(shape = c(16)) +#' x1 <- input1 |> layer_dense(8, activation = 'relu') +#' input2 <- layer_input(shape = c(32)) +#' x2 <- input2 |> layer_dense(8, activation = 'relu') +#' # equivalent to `y <- layer_minimum(x1, x2)` +#' y <- layer_minimum(x1, x2) +#' out <- y |> layer_dense(4) +#' model <- keras_model(inputs = c(input1, input2), outputs = out) +#' ``` +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param inputs +#' layers to combine +#' +#' @inherit layer_dense return +#' @export +#' @family minimum merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Minimum +layer_minimum <- +function (inputs, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Minimum, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} + + +#' Performs elementwise multiplication. +#' +#' @description +#' It takes as input a list of tensors, all of the same shape, +#' and returns a single tensor (also of the same shape). +#' +#' # Examples +#' ```{r} +#' input_shape <- c(2, 3, 4) +#' x1 <- random_uniform(input_shape) +#' x2 <- random_uniform(input_shape) +#' y <- layer_multiply(x1, x2) +#' ``` +#' +#' Usage in a Keras model: +#' +#' ```{r} +#' input1 <- layer_input(shape = c(16)) +#' x1 <- input1 |> layer_dense(8, activation = 'relu') +#' input2 <- layer_input(shape = c(32)) +#' x2 <- input2 |> layer_dense(8, activation = 'relu') +#' # equivalent to `y <- layer_multiply(x1, x2)` +#' y <- layer_multiply(x1, x2) +#' out <- y |> layer_dense(4) +#' model <- keras_model(inputs = c(input1, input2), outputs = out) +#' ``` +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param inputs +#' layers to combine +#' +#' @inherit layer_dense return +#' @export +#' @family multiply merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Multiply +layer_multiply <- +function (inputs, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Multiply, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} + + +#' Performs elementwise subtraction. +#' +#' @description +#' It takes as input a list of tensors of size 2 both of the +#' same shape, and returns a single tensor `(inputs[0] - inputs[1))` +#' of same shape. +#' +#' # Examples +#' ```{r} +#' input_shape <- c(2, 3, 4) +#' x1 <- random_uniform(input_shape) +#' x2 <- random_uniform(input_shape) +#' y <- layer_subtract(list(x1, x2)) +#' ``` +#' +#' Usage in a Keras model: +#' +#' ```{r} +#' input1 <- layer_input(shape = 16) +#' x1 <- layer_dense(input1, units = 8, activation = 'relu') +#' input2 <- layer_input(shape = 32) +#' x2 <- layer_dense(input2, units = 8, activation = 'relu') +#' subtracted <- layer_subtract(list(x1, x2)) +#' out <- layer_dense(subtracted, units = 4) +#' model <- keras_model(inputs = list(input1, input2), outputs = out) +#' ``` +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param inputs +#' layers to combine +#' +#' @inherit layer_dense return +#' @export +#' @family subtract merging layers +#' @family merging layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.Subtract +layer_subtract <- +function (inputs, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = c("...", "inputs")) + dots <- split_dots_named_unnamed(list(...)) + if (missing(inputs)) + inputs <- NULL + else if (!is.null(inputs) && !is.list(inputs)) + inputs <- list(inputs) + inputs <- c(inputs, dots$unnamed) + args <- c(args, dots$named) + layer <- create_layer(keras$layers$Subtract, NULL, args) + if (length(inputs)) + layer(inputs) + else layer +} diff --git a/R/layers-noise.R b/R/layers-noise.R deleted file mode 100644 index 99aabdaf6a..0000000000 --- a/R/layers-noise.R +++ /dev/null @@ -1,119 +0,0 @@ - - -#' Apply additive zero-centered Gaussian noise. -#' -#' This is useful to mitigate overfitting (you could see it as a form of random -#' data augmentation). Gaussian Noise (GS) is a natural choice as corruption -#' process for real valued inputs. As it is a regularization layer, it is only -#' active at training time. -#' -#' @inheritParams layer_dense -#' -#' @param stddev float, standard deviation of the noise distribution. -#' -#' @section Input shape: Arbitrary. Use the keyword argument `input_shape` (list -#' of integers, does not include the samples axis) when using this layer as -#' the first layer in a model. -#' -#' @section Output shape: Same shape as input. -#' -#' @family noise layers -#' -#' @export -layer_gaussian_noise <- function(object, stddev, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$GaussianNoise, object, list( - stddev = stddev, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) -} - -#' Apply multiplicative 1-centered Gaussian noise. -#' -#' As it is a regularization layer, it is only active at training time. -#' -#' @inheritParams layer_dense -#' -#' @param rate float, drop probability (as with `Dropout`). The multiplicative -#' noise will have standard deviation `sqrt(rate / (1 - rate))`. -#' -#' @section Input shape: Arbitrary. Use the keyword argument `input_shape` (list -#' of integers, does not include the samples axis) when using this layer as -#' the first layer in a model. -#' -#' @section Output shape: Same shape as input. -#' -#' @section References: -#' - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting Srivastava, Hinton, et al. 2014](https://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) -#' -#' @family noise layers -#' -#' @export -layer_gaussian_dropout <- function(object, rate, input_shape = NULL, - batch_input_shape = NULL, batch_size = NULL, dtype = NULL, - name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$GaussianDropout, object, list( - rate = rate, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - )) -} - - -#' Applies Alpha Dropout to the input. -#' -#' Alpha Dropout is a dropout that keeps mean and variance of inputs to their -#' original values, in order to ensure the self-normalizing property even after -#' this dropout. -#' -#' Alpha Dropout fits well to Scaled Exponential Linear Units by randomly -#' setting activations to the negative saturation value. -#' -#' @inheritParams layer_dense -#' -#' @param rate float, drop probability (as with `layer_dropout()`). The -#' multiplicative noise will have standard deviation `sqrt(rate / (1 - -#' rate))`. -#' @param noise_shape Noise shape -#' @param seed An integer to use as random seed. -#' @param ... standard layer arguments. -#' -#' @section Input shape: Arbitrary. Use the keyword argument `input_shape` (list -#' of integers, does not include the samples axis) when using this layer as -#' the first layer in a model. -#' -#' @section Output shape: Same shape as input. -#' -#' @section References: -#' - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) -#' -#' @seealso -#' -#' @family noise layers -#' -#' @export -layer_alpha_dropout <- -function(object, rate, noise_shape = NULL, seed = NULL, ...) { - args <- capture_args(match.call(), - modifiers = list( - seed = as_nullable_integer, - input_shape = normalize_shape, - batch_input_shape = normalize_shape, - batch_size = as_nullable_integer - ), - ignore = "object" - ) - create_layer(keras$layers$AlphaDropout, object, args) -} diff --git a/R/layers-normalization.R b/R/layers-normalization.R index e54b891b24..28c4b85235 100644 --- a/R/layers-normalization.R +++ b/R/layers-normalization.R @@ -1,219 +1,509 @@ -#' Batch normalization layer (Ioffe and Szegedy, 2014). -#' -#' Normalize the activations of the previous layer at each batch, i.e. applies a -#' transformation that maintains the mean activation close to 0 and the -#' activation standard deviation close to 1. -#' -#' @inheritParams layer_dense -#' -#' @param axis Integer, the axis that should be normalized (typically the -#' features axis). For instance, after a `Conv2D` layer with -#' `data_format="channels_first"`, set `axis=1` in `BatchNormalization`. -#' @param momentum Momentum for the moving mean and the moving variance. -#' @param epsilon Small float added to variance to avoid dividing by zero. -#' @param center If TRUE, add offset of `beta` to normalized tensor. If FALSE, -#' `beta` is ignored. -#' @param scale If TRUE, multiply by `gamma`. If FALSE, `gamma` is not used. -#' When the next layer is linear (also e.g. `nn.relu`), this can be disabled -#' since the scaling will be done by the next layer. -#' @param beta_initializer Initializer for the beta weight. -#' @param gamma_initializer Initializer for the gamma weight. -#' @param moving_mean_initializer Initializer for the moving mean. -#' @param moving_variance_initializer Initializer for the moving variance. -#' @param beta_regularizer Optional regularizer for the beta weight. -#' @param gamma_regularizer Optional regularizer for the gamma weight. -#' @param beta_constraint Optional constraint for the beta weight. -#' @param gamma_constraint Optional constraint for the gamma weight. -#' @param renorm Whether to use Batch Renormalization -#' (https://arxiv.org/abs/1702.03275). This adds extra variables during -#' training. The inference is the same for either value of this parameter. -#' @param renorm_clipping A named list or dictionary that may map keys `rmax`, -#' `rmin`, `dmax` to scalar Tensors used to clip the renorm correction. The -#' correction `(r, d)` is used as `corrected_value = normalized_value * r + d`, -#' with `r` clipped to `[rmin, rmax]`, and `d` to `[-dmax, dmax]`. Missing `rmax`, -#' `rmin`, `dmax` are set to `Inf`, `0`, `Inf`, `respectively`. -#' @param renorm_momentum Momentum used to update the moving means and standard -#' deviations with renorm. Unlike momentum, this affects training and should -#' be neither too small (which would add noise) nor too large (which would -#' give stale estimates). Note that momentum is still applied to get the means -#' and variances for inference. -#' @param fused `TRUE`, use a faster, fused implementation, or raise a ValueError -#' if the fused implementation cannot be used. If `NULL`, use the faster -#' implementation if possible. If `FALSE`, do not use the fused implementation. -#' @param virtual_batch_size An integer. By default, virtual_batch_size is `NULL`, -#' which means batch normalization is performed across the whole batch. -#' When virtual_batch_size is not `NULL`, instead perform "Ghost Batch -#' Normalization", which creates virtual sub-batches which are each normalized -#' separately (with shared gamma, beta, and moving statistics). Must divide -#' the actual `batch size` during execution. -#' @param adjustment A function taking the Tensor containing the (dynamic) shape -#' of the input tensor and returning a pair `(scale, bias)` to apply to the -#' normalized values `(before gamma and beta)`, only during training. -#' For example, if `axis==-1`, -#' \code{adjustment <- function(shape) { -#' tuple(tf$random$uniform(shape[-1:NULL, style = "python"], 0.93, 1.07), -#' tf$random$uniform(shape[-1:NULL, style = "python"], -0.1, 0.1)) -#' }} -#' will scale the normalized value -#' by up to 7% up or down, then shift the result by up to 0.1 (with -#' independent scaling and bias for each feature but shared across all examples), -#' and finally apply gamma and/or beta. If `NULL`, no adjustment is applied. -#' Cannot be specified if virtual_batch_size is specified. -#' @section Input shape: Arbitrary. Use the keyword argument `input_shape` (list -#' of integers, does not include the samples axis) when using this layer as -#' the first layer in a model. -#' -#' @section Output shape: Same shape as input. -#' -#' @section References: -#' - [Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167) + + +#' Layer that normalizes its inputs. +#' +#' @description +#' Batch normalization applies a transformation that maintains the mean output +#' close to 0 and the output standard deviation close to 1. +#' +#' Importantly, batch normalization works differently during training and +#' during inference. +#' +#' **During training** (i.e. when using `fit()` or when calling the layer/model +#' with the argument `training = TRUE`), the layer normalizes its output using +#' the mean and standard deviation of the current batch of inputs. That is to +#' say, for each channel being normalized, the layer returns +#' `gamma * (batch - mean(batch)) / sqrt(var(batch) + epsilon) + beta`, where: +#' +#' - `epsilon` is small constant (configurable as part of the constructor +#' arguments) +#' - `gamma` is a learned scaling factor (initialized as 1), which +#' can be disabled by passing `scale = FALSE` to the constructor. +#' - `beta` is a learned offset factor (initialized as 0), which +#' can be disabled by passing `center = FALSE` to the constructor. +#' +#' **During inference** (i.e. when using `evaluate()` or `predict()` or when +#' calling the layer/model with the argument `training = FALSE` (which is the +#' default), the layer normalizes its output using a moving average of the +#' mean and standard deviation of the batches it has seen during training. That +#' is to say, it returns +#' `gamma * (batch - self$moving_mean) / sqrt(self$moving_var+epsilon) + beta`. +#' +#' `self$moving_mean` and `self$moving_var` are non-trainable variables that +#' are updated each time the layer in called in training mode, as such: +#' +#' - `moving_mean = moving_mean * momentum + mean(batch) * (1 - momentum)` +#' - `moving_var = moving_var * momentum + var(batch) * (1 - momentum)` +#' +#' As such, the layer will only normalize its inputs during inference +#' *after having been trained on data that has similar statistics as the +#' inference data*. +#' +#' **About setting `layer$trainable <- FALSE` on a `BatchNormalization` layer:** +#' +#' The meaning of setting `layer$trainable <- FALSE` is to freeze the layer, +#' i.e. its internal state will not change during training: +#' its trainable weights will not be updated +#' during `fit()` or `train_on_batch()`, and its state updates will not be run. +#' +#' Usually, this does not necessarily mean that the layer is run in inference +#' mode (which is normally controlled by the `training` argument that can +#' be passed when calling a layer). "Frozen state" and "inference mode" +#' are two separate concepts. +#' +#' However, in the case of the `BatchNormalization` layer, **setting +#' `trainable <- FALSE` on the layer means that the layer will be +#' subsequently run in inference mode** (meaning that it will use +#' the moving mean and the moving variance to normalize the current batch, +#' rather than using the mean and variance of the current batch). +#' +#' Note that: +#' +#' - Setting `trainable` on an model containing other layers will recursively +#' set the `trainable` value of all inner layers. +#' - If the value of the `trainable` attribute is changed after calling +#' `compile()` on a model, the new value doesn't take effect for this model +#' until `compile()` is called again. +#' +#' # Call Arguments +#' - `inputs`: Input tensor (of any rank). +#' - `training`: R boolean indicating whether the layer should behave in +#' training mode or in inference mode. +#' - `training = TRUE`: The layer will normalize its inputs using +#' the mean and variance of the current batch of inputs. +#' - `training = FALSE`: The layer will normalize its inputs using +#' the mean and variance of its moving statistics, learned during +#' training. +#' - `mask`: Binary tensor of shape broadcastable to `inputs` tensor, with +#' `TRUE` values indicating the positions for which mean and variance +#' should be computed. Masked elements of the current inputs are not +#' taken into account for mean and variance computation during +#' training. Any prior unmasked element values will be taken into +#' account until their momentum expires. +#' +#' # Reference +#' - [Ioffe and Szegedy, 2015](https://arxiv.org/abs/1502.03167). +#' +#' @param axis +#' Integer, the axis that should be normalized +#' (typically the features axis). For instance, after a `Conv2D` layer +#' with `data_format = "channels_first"`, use `axis = 2`. +#' +#' @param momentum +#' Momentum for the moving average. +#' +#' @param epsilon +#' Small float added to variance to avoid dividing by zero. +#' +#' @param center +#' If `TRUE`, add offset of `beta` to normalized tensor. +#' If `FALSE`, `beta` is ignored. +#' +#' @param scale +#' If `TRUE`, multiply by `gamma`. If `FALSE`, `gamma` is not used. +#' When the next layer is linear this can be disabled +#' since the scaling will be done by the next layer. +#' +#' @param beta_initializer +#' Initializer for the beta weight. +#' +#' @param gamma_initializer +#' Initializer for the gamma weight. +#' +#' @param moving_mean_initializer +#' Initializer for the moving mean. +#' +#' @param moving_variance_initializer +#' Initializer for the moving variance. #' +#' @param beta_regularizer +#' Optional regularizer for the beta weight. +#' +#' @param gamma_regularizer +#' Optional regularizer for the gamma weight. +#' +#' @param beta_constraint +#' Optional constraint for the beta weight. +#' +#' @param gamma_constraint +#' Optional constraint for the gamma weight. +#' +#' @param synchronized +#' Only applicable with the TensorFlow backend. +#' If `TRUE`, synchronizes the global batch statistics (mean and +#' variance) for the layer across all devices at each training step +#' in a distributed training strategy. +#' If `FALSE`, each replica uses its own local batch statistics. +#' +#' @param ... +#' Base layer keyword arguments (e.g. `name` and `dtype`). +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return #' @export -layer_batch_normalization <- function(object, axis = -1L, momentum = 0.99, epsilon = 0.001, center = TRUE, scale = TRUE, - beta_initializer = "zeros", gamma_initializer = "ones", - moving_mean_initializer = "zeros", moving_variance_initializer = "ones", - beta_regularizer = NULL, gamma_regularizer = NULL, beta_constraint = NULL, - gamma_constraint = NULL, renorm = FALSE, renorm_clipping = NULL, - renorm_momentum = 0.99, fused = NULL, virtual_batch_size = NULL, - adjustment = NULL, input_shape = NULL, batch_input_shape = NULL, - batch_size = NULL, dtype = NULL, name = NULL, trainable = NULL, weights = NULL) { +#' @family normalization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.BatchNormalization +layer_batch_normalization <- +function (object, axis = -1L, momentum = 0.99, epsilon = 0.001, + center = TRUE, scale = TRUE, beta_initializer = "zeros", + gamma_initializer = "ones", moving_mean_initializer = "zeros", + moving_variance_initializer = "ones", beta_regularizer = NULL, + gamma_regularizer = NULL, beta_constraint = NULL, gamma_constraint = NULL, + synchronized = FALSE, ...) +{ + args <- capture_args(list(axis = as_axis, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$BatchNormalization, object, args) +} - stopifnot(is.null(adjustment) || is.function(adjustment)) - create_layer(keras$layers$BatchNormalization, object, list( - axis = as.integer(axis), - momentum = momentum, - epsilon = epsilon, - center = center, - scale = scale, - beta_initializer = beta_initializer, - gamma_initializer = gamma_initializer, - moving_mean_initializer = moving_mean_initializer, - moving_variance_initializer = moving_variance_initializer, - beta_regularizer = beta_regularizer, - gamma_regularizer = gamma_regularizer, - beta_constraint = beta_constraint, - gamma_constraint = gamma_constraint, - renorm = renorm, - renorm_clipping = renorm_clipping, - renorm_momentum = renorm_momentum, - fused = fused, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - virtual_batch_size = as_nullable_integer(virtual_batch_size), - adjustment = adjustment, - weights = weights - )) +#' Group normalization layer. +#' +#' @description +#' Group Normalization divides the channels into groups and computes +#' within each group the mean and variance for normalization. +#' Empirically, its accuracy is more stable than batch norm in a wide +#' range of small batch sizes, if learning rate is adjusted linearly +#' with batch sizes. +#' +#' Relation to Layer Normalization: +#' If the number of groups is set to 1, then this operation becomes nearly +#' identical to Layer Normalization (see Layer Normalization docs for details). +#' +#' Relation to Instance Normalization: +#' If the number of groups is set to the input dimension (number of groups is +#' equal to number of channels), then this operation becomes identical to +#' Instance Normalization. You can achieve this via `groups=-1`. +#' +#' # Input Shape +#' Arbitrary. Use the keyword argument +#' `input_shape` (tuple of integers, does not include the samples +#' axis) when using this layer as the first layer in a model. +#' +#' # Output Shape +#' Same shape as input. +#' **kwargs: Base layer keyword arguments (e.g. `name` and `dtype`). +#' +#' # Reference +#' - [Yuxin Wu & Kaiming He, 2018](https://arxiv.org/abs/1803.08494) +#' +#' @param groups +#' Integer, the number of groups for Group Normalization. Can be in +#' the range `[1, N]` where N is the input dimension. The input +#' dimension must be divisible by the number of groups. +#' Defaults to 32. +#' +#' @param axis +#' Integer or List/Tuple. The axis or axes to normalize across. +#' Typically, this is the features axis/axes. The left-out axes are +#' typically the batch axis/axes. -1 is the last dimension in the +#' input. Defaults to `-1`. +#' +#' @param epsilon +#' Small float added to variance to avoid dividing by zero. +#' Defaults to 1e-3. +#' +#' @param center +#' If `TRUE`, add offset of `beta` to normalized tensor. +#' If `FALSE`, `beta` is ignored. Defaults to `TRUE`. +#' +#' @param scale +#' If `TRUE`, multiply by `gamma`. If `FALSE`, `gamma` is not used. +#' When the next layer is linear (also e.g. `relu`), this can be +#' disabled since the scaling will be done by the next layer. +#' Defaults to `TRUE`. +#' +#' @param beta_initializer +#' Initializer for the beta weight. Defaults to zeros. +#' +#' @param gamma_initializer +#' Initializer for the gamma weight. Defaults to ones. +#' +#' @param beta_regularizer +#' Optional regularizer for the beta weight. `NULL` by +#' default. +#' +#' @param gamma_regularizer +#' Optional regularizer for the gamma weight. `NULL` by +#' default. +#' +#' @param beta_constraint +#' Optional constraint for the beta weight. +#' `NULL` by default. +#' +#' @param gamma_constraint +#' Optional constraint for the gamma weight. `NULL` by +#' default. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family normalization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GroupNormalization +layer_group_normalization <- +function (object, groups = 32L, axis = -1L, epsilon = 0.001, + center = TRUE, scale = TRUE, beta_initializer = "zeros", + gamma_initializer = "ones", beta_regularizer = NULL, gamma_regularizer = NULL, + beta_constraint = NULL, gamma_constraint = NULL, ...) +{ + args <- capture_args(list(groups = as_integer, axis = as_axis, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$GroupNormalization, object, args) } #' Layer normalization layer (Ba et al., 2016). #' +#' @description #' Normalize the activations of the previous layer for each given example in a -#' batch independently, rather than across a batch like Batch Normalization. i.e. -#' applies a transformation that maintains the mean activation within each example -#' close to 0 and the activation standard deviation close to 1. -#' -#' Given a tensor inputs, moments are calculated and normalization is performed -#' across the axes specified in axis. -#' -#' @inheritParams layer_dense -#' @param axis Integer or List/Tuple. The axis or axes to normalize across. -#' Typically this is the features axis/axes. The left-out axes are typically -#' the batch axis/axes. This argument defaults to -1, the last dimension in -#' the input. -#' @param epsilon Small float added to variance to avoid dividing by zero. -#' Defaults to 1e-3 -#' @param center If True, add offset of beta to normalized tensor. If False, -#' beta is ignored. Defaults to True. -#' @param scale If True, multiply by gamma. If False, gamma is not used. -#' Defaults to True. When the next layer is linear (also e.g. nn.relu), this -#' can be disabled since the scaling will be done by the next layer. -#' @param beta_initializer Initializer for the beta weight. Defaults to zeros. -#' @param gamma_initializer Initializer for the gamma weight. Defaults to ones. -#' @param beta_regularizer Optional regularizer for the beta weight. -#' None by default. -#' @param gamma_regularizer Optional regularizer for the gamma weight. -#' None by default. -#' @param beta_constraint Optional constraint for the beta weight. None by default. -#' @param gamma_constraint Optional constraint for the gamma weight. -#' None by default. -#' @param trainable Boolean, if True the variables will be marked as trainable. -#' Defaults to True. +#' batch independently, rather than across a batch like Batch Normalization. +#' i.e. applies a transformation that maintains the mean activation within each +#' example close to 0 and the activation standard deviation close to 1. +#' +#' If `scale` or `center` are enabled, the layer will scale the normalized +#' outputs by broadcasting them with a trainable variable `gamma`, and center +#' the outputs by broadcasting with a trainable variable `beta`. `gamma` will +#' default to a ones tensor and `beta` will default to a zeros tensor, so that +#' centering and scaling are no-ops before training has begun. +#' +#' So, with scaling and centering enabled the normalization equations +#' are as follows: +#' +#' Let the intermediate activations for a mini-batch to be the `inputs`. +#' +#' For each sample `x` in a batch of `inputs`, we compute the mean and +#' variance of the sample, normalize each value in the sample +#' (including a small factor `epsilon` for numerical stability), +#' and finally, +#' transform the normalized output by `gamma` and `beta`, +#' which are learned parameters: +#' +#' ```{r, eval = FALSE} +#' outputs <- inputs |> apply(1, function(x) { +#' x_normalized <- (x - mean(x)) / +#' sqrt(var(x) + epsilon) +#' x_normalized * gamma + beta +#' }) +#' ``` +#' +#' `gamma` and `beta` will span the axes of `inputs` specified in `axis`, and +#' this part of the inputs' shape must be fully defined. +#' +#' For example: +#' +#' ```{r} +#' layer <- layer_layer_normalization(axis = c(2, 3, 4)) +#' +#' layer(op_ones(c(5, 20, 30, 40))) |> invisible() # build() +#' shape(layer$beta) +#' shape(layer$gamma) +#' ``` +#' +#' Note that other implementations of layer normalization may choose to define +#' `gamma` and `beta` over a separate set of axes from the axes being +#' normalized across. For example, Group Normalization +#' ([Wu et al. 2018](https://arxiv.org/abs/1803.08494)) with group size of 1 +#' corresponds to a `layer_layer_normalization()` that normalizes across height, width, +#' and channel and has `gamma` and `beta` span only the channel dimension. +#' So, this `layer_layer_normalization()` implementation will not match a +#' `layer_group_normalization()` layer with group size set to 1. +#' +#' # Reference +#' - [Lei Ba et al., 2016](https://arxiv.org/abs/1607.06450). +#' +#' @param axis +#' Integer or list. The axis or axes to normalize across. +#' Typically, this is the features axis/axes. The left-out axes are +#' typically the batch axis/axes. `-1` is the last dimension in the +#' input. Defaults to `-1`. +#' +#' @param epsilon +#' Small float added to variance to avoid dividing by zero. +#' Defaults to 1e-3. +#' +#' @param center +#' If `TRUE`, add offset of `beta` to normalized tensor. If `FALSE`, +#' `beta` is ignored. Defaults to `TRUE`. +#' +#' @param scale +#' If `TRUE`, multiply by `gamma`. If `FALSE`, `gamma` is not used. +#' When the next layer is linear (also e.g. `layer_activation_relu()`), this can be +#' disabled since the scaling will be done by the next layer. +#' Defaults to `TRUE`. +#' +#' @param rms_scaling +#' If `TRUE`, `center` and `scale` are ignored, and the +#' inputs are scaled by `gamma` and the inverse square root +#' of the square of all inputs. This is an approximate and faster +#' approach that avoids ever computing the mean of the input. +#' +#' @param beta_initializer +#' Initializer for the beta weight. Defaults to zeros. +#' +#' @param gamma_initializer +#' Initializer for the gamma weight. Defaults to ones. +#' +#' @param beta_regularizer +#' Optional regularizer for the beta weight. +#' `NULL` by default. #' +#' @param gamma_regularizer +#' Optional regularizer for the gamma weight. +#' `NULL` by default. +#' +#' @param beta_constraint +#' Optional constraint for the beta weight. +#' `NULL` by default. +#' +#' @param gamma_constraint +#' Optional constraint for the gamma weight. +#' `NULL` by default. +#' +#' @param ... +#' Base layer keyword arguments (e.g. `name` and `dtype`). +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return #' @export -layer_layer_normalization <- function( - object, - axis=-1, - epsilon=0.001, - center=TRUE, - scale=TRUE, - beta_initializer="zeros", - gamma_initializer="ones", - beta_regularizer=NULL, - gamma_regularizer=NULL, - beta_constraint=NULL, - gamma_constraint=NULL, - trainable=TRUE, - name=NULL -) { +#' @family normalization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.LayerNormalization +layer_layer_normalization <- +function (object, axis = -1L, epsilon = 0.001, center = TRUE, + scale = TRUE, rms_scaling = FALSE, beta_initializer = "zeros", + gamma_initializer = "ones", beta_regularizer = NULL, gamma_regularizer = NULL, + beta_constraint = NULL, gamma_constraint = NULL, ...) +{ + args <- capture_args(list(axis = as_axis, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$LayerNormalization, object, args) +} + - create_layer(keras$layers$LayerNormalization, object, list( - axis=as.integer(axis), - epsilon=epsilon, - center=center, - scale=scale, - beta_initializer=beta_initializer, - gamma_initializer=gamma_initializer, - beta_regularizer=beta_regularizer, - gamma_regularizer=gamma_regularizer, - beta_constraint=beta_constraint, - gamma_constraint=gamma_constraint, - trainable=trainable, - name=name - )) +#' Performs spectral normalization on the weights of a target layer. +#' +#' @description +#' This wrapper controls the Lipschitz constant of the weights of a layer by +#' constraining their spectral norm, which can stabilize the training of GANs. +#' +#' # Examples +#' Wrap `layer_conv_2d`: +#' ```{r} +#' x <- random_uniform(c(1, 10, 10, 1)) +#' conv2d <- layer_spectral_normalization( +#' layer = layer_conv_2d(filters = 2, kernel_size = 2) +#' ) +#' y <- conv2d(x) +#' shape(y) +#' ``` +#' +#' Wrap `layer_dense`: +#' ```{r} +#' x <- random_uniform(c(1, 10, 10, 1)) +#' dense <- layer_spectral_normalization(layer = layer_dense(units = 10)) +#' y <- dense(x) +#' shape(y) +#' ``` +#' +#' # Reference +#' - [Spectral Normalization for GAN](https://arxiv.org/abs/1802.05957). +#' +#' @param layer +#' A `Layer` instance that +#' has either a `kernel` (e.g. `layer_conv_2d`, `layer_dense`...) +#' or an `embeddings` attribute (`layer_embedding` layer). +#' +#' @param power_iterations +#' int, the number of iterations during normalization. +#' +#' @param ... +#' Base wrapper keyword arguments. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return +#' @export +#' @family normalization layers +#' @family layers +# @seealso +# + +#' +#' @tether keras.layers.SpectralNormalization +layer_spectral_normalization <- +function (object, layer, power_iterations = 1L, ...) +{ + args <- capture_args(list(power_iterations = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$SpectralNormalization, object, + args) } -#' Unit normalization layer +#' Unit normalization layer. #' -#' @details +#' @description #' Normalize a batch of inputs so that each input in the batch has a L2 norm #' equal to 1 (across the axes specified in `axis`). #' -#' @inheritParams layer_dense -#' @param axis Integer or list. The axis or axes to normalize across. Typically -#' this is the features axis or axes. The left-out axes are typically the -#' batch axis or axes. Defaults to `-1`, the last dimension in -#' the input. -#' @param ... standard layer arguments. -#' -#' ````r -#' data <- as_tensor(1:6, shape = c(2, 3), dtype = "float32") -#' normalized_data <- data %>% layer_unit_normalization() -#' for(row in 1:2) -#' normalized_data[row, ] %>% -#' { sum(.^2) } %>% -#' print() -#' # tf.Tensor(0.9999999, shape=(), dtype=float32) -#' # tf.Tensor(1.0, shape=(), dtype=float32) -#' ```` +#' # Examples +#' ```{r} +#' data <- op_reshape(1:6, newshape = c(2, 3)) +#' normalized_data <- layer_unit_normalization(data) +#' op_sum(normalized_data[1,]^2) +#' ``` #' -#' @seealso -#' + +#' @param axis +#' Integer or list. The axis or axes to normalize across. +#' Typically, this is the features axis or axes. The left-out axes are +#' typically the batch axis or axes. `-1` is the last dimension +#' in the input. Defaults to `-1`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export +#' @family normalization layers +#' @family layers +#' @seealso +#' + +# + #' +#' @tether keras.layers.UnitNormalization layer_unit_normalization <- -function(object, axis = -1L, ...) +function (object, axis = -1L, ...) { - args <- capture_args(match.call(), list(axis = as_axis), - ignore = "object") - create_layer(keras$layers$UnitNormalization, object, args) + args <- capture_args(list(axis = as_axis, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$UnitNormalization, object, args) } - diff --git a/R/layers-pooling.R b/R/layers-pooling.R index feade96a28..a4ed58a726 100644 --- a/R/layers-pooling.R +++ b/R/layers-pooling.R @@ -1,425 +1,1058 @@ -#' Max pooling operation for temporal data. -#' -#' @inheritParams layer_dense -#' -#' @param pool_size Integer, size of the max pooling windows. -#' @param strides Integer, or NULL. Factor by which to downscale. E.g. 2 will -#' halve the input. If NULL, it will default to `pool_size`. -#' @param padding One of `"valid"` or `"same"` (case-insensitive). -#' @param data_format A string, one of "channels_last" (default) or -#' "channels_first". The ordering of the dimensions in the inputs. -#' channels_last corresponds to inputs with shape `(batch, steps, features)` -#' while channels_first corresponds to inputs with shape `(batch, features, steps)`. -#' -#' @section Input Shape: -#' If data_format='channels_last': 3D tensor with shape `(batch_size, steps, features)`. -#' If data_format='channels_first': 3D tensor with shape `(batch_size, features, steps)`. -#' -#' @section Output shape: -#' If data_format='channels_last': 3D tensor with shape `(batch_size, downsampled_steps, features)`. -#' If data_format='channels_first': 3D tensor with shape `(batch_size, features, downsampled_steps)`. -#' -#' @family pooling layers + +#' Average pooling for temporal data. #' +#' @description +#' Downsamples the input representation by taking the average value over the +#' window defined by `pool_size`. The window is shifted by `strides`. The +#' resulting output when using "valid" padding option has a shape of: +#' `output_shape = (input_shape - pool_size + 1) / strides)` +#' +#' The resulting output shape when using the "same" padding option is: +#' `output_shape = input_shape / strides` +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 3D tensor with shape `(batch_size, steps, features)`. +#' - If `data_format="channels_first"`: +#' 3D tensor with shape `(batch_size, features, steps)`. +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 3D tensor with shape `(batch_size, downsampled_steps, features)`. +#' - If `data_format="channels_first"`: +#' 3D tensor with shape `(batch_size, features, downsampled_steps)`. +#' +#' # Examples +#' `strides=1` and `padding="valid"`: +#' +#' ```{r} +#' x <- op_array(c(1., 2., 3., 4., 5.)) |> op_reshape(c(1, 5, 1)) +#' output <- x |> +#' layer_average_pooling_1d(pool_size = 2, +#' strides = 1, +#' padding = "valid") +#' output +#' ``` +#' +#' `strides=2` and `padding="valid"`: +#' +#' ```{r} +#' x <- op_array(c(1., 2., 3., 4., 5.)) |> op_reshape(c(1, 5, 1)) +#' output <- x |> +#' layer_average_pooling_1d(pool_size = 2, +#' strides = 2, +#' padding = "valid") +#' output +#' ``` +#' +#' `strides=1` and `padding="same"`: +#' +#' ```{r} +#' x <- op_array(c(1., 2., 3., 4., 5.)) |> op_reshape(c(1, 5, 1)) +#' output <- x |> +#' layer_average_pooling_1d(pool_size = 2, +#' strides = 1, +#' padding = "same") +#' output +#' ``` +#' +#' @param pool_size +#' int, size of the max pooling window. +#' +#' @param strides +#' int or `NULL`. Specifies how much the pooling window moves +#' for each pooling step. If `NULL`, it will default to `pool_size`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_max_pooling_1d <- function(object, pool_size = 2L, strides = NULL, padding = "valid", - data_format='channels_last', - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$MaxPooling1D, object, list( - pool_size = as.integer(pool_size), - strides = as_nullable_integer(strides), - padding = padding, - data_format = match.arg(data_format, c("channels_last", "channels_first")), - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.AveragePooling1D +layer_average_pooling_1d <- +function (object, pool_size, strides = NULL, padding = "valid", + data_format = NULL, name = NULL, ...) +{ + args <- capture_args(list(pool_size = as_integer, strides = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$AveragePooling1D, object, args) } - -#' Max pooling operation for spatial data. -#' -#' @inheritParams layer_conv_2d -#' @inheritParams layer_max_pooling_1d -#' -#' @param pool_size integer or list of 2 integers, factors by which to downscale -#' (vertical, horizontal). (2, 2) will halve the input in both spatial -#' dimension. If only one integer is specified, the same window length will be -#' used for both dimensions. -#' @param strides Integer, list of 2 integers, or NULL. Strides values. If NULL, -#' it will default to `pool_size`. -#' @param padding One of `"valid"` or `"same"` (case-insensitive). -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` -#' - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` -#' -#' @section Output shape: -#' - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, pooled_rows, pooled_cols, channels)` -#' - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, pooled_rows, pooled_cols)` -#' -#' @family pooling layers -#' +#' Average pooling operation for 2D spatial data. +#' +#' @description +#' Downsamples the input along its spatial dimensions (height and width) +#' by taking the average value over an input window +#' (of size defined by `pool_size`) for each channel of the input. +#' The window is shifted by `strides` along each dimension. +#' +#' The resulting output when using the `"valid"` padding option has a spatial +#' shape (number of rows or columns) of: +#' `output_shape = math.floor((input_shape - pool_size) / strides) + 1` +#' (when `input_shape >= pool_size`) +#' +#' The resulting output shape when using the `"same"` padding option is: +#' `output_shape = math.floor((input_shape - 1) / strides) + 1` +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 4D tensor with shape `(batch_size, height, width, channels)`. +#' - If `data_format="channels_first"`: +#' 4D tensor with shape `(batch_size, channels, height, width)`. +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 4D tensor with shape +#' `(batch_size, pooled_height, pooled_width, channels)`. +#' - If `data_format="channels_first"`: +#' 4D tensor with shape +#' `(batch_size, channels, pooled_height, pooled_width)`. +#' +#' # Examples +#' `strides=(1, 1)` and `padding="valid"`: +#' +#' ```{r} +#' x <- op_array(1:9, "float32") |> op_reshape(c(1, 3, 3, 1)) +#' output <- x |> +#' layer_average_pooling_2d(pool_size = c(2, 2), +#' strides = c(1, 1), +#' padding = "valid") +#' output +#' ``` +#' +#' `strides=(2, 2)` and `padding="valid"`: +#' +#' ```{r} +#' x <- op_array(1:12, "float32") |> op_reshape(c(1, 3, 4, 1)) +#' output <- x |> +#' layer_average_pooling_2d(pool_size = c(2, 2), +#' strides = c(2, 2), +#' padding = "valid") +#' output +#' ``` +#' +#' `stride=(1, 1)` and `padding="same"`: +#' +#' ```{r} +#' x <- op_array(1:9, "float32") |> op_reshape(c(1, 3, 3, 1)) +#' output <- x |> +#' layer_average_pooling_2d(pool_size = c(2, 2), +#' strides = c(1, 1), +#' padding = "same") +#' output +#' ``` +#' +#' @param pool_size +#' int or list of 2 integers, factors by which to downscale +#' (dim1, dim2). If only one integer is specified, the same +#' window length will be used for all dimensions. +#' +#' @param strides +#' int or list of 2 integers, or `NULL`. Strides values. If `NULL`, +#' it will default to `pool_size`. If only one int is specified, the +#' same stride size will be used for all dimensions. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_max_pooling_2d <- function(object, pool_size = c(2L, 2L), strides = NULL, padding = "valid", data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$MaxPooling2D, object, list( - pool_size = as.integer(pool_size), - strides = as_nullable_integer(strides), - padding = padding, - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.AveragePooling2D +layer_average_pooling_2d <- +function (object, pool_size, strides = NULL, padding = "valid", + data_format = NULL, name = NULL, ...) +{ + args <- capture_args(list(pool_size = as_integer, strides = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$AveragePooling2D, object, args) } -#' Max pooling operation for 3D data (spatial or spatio-temporal). -#' -#' @inheritParams layer_max_pooling_1d -#' -#' @param pool_size list of 3 integers, factors by which to downscale (dim1, -#' dim2, dim3). (2, 2, 2) will halve the size of the 3D input in each -#' dimension. -#' @param strides list of 3 integers, or NULL. Strides values. -#' @param padding One of `"valid"` or `"same"` (case-insensitive). -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` -#' - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` -#' -#' @section Output shape: -#' - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` -#' - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` -#' -#' @family pooling layers +#' Average pooling operation for 3D data (spatial or spatio-temporal). #' +#' @description +#' Downsamples the input along its spatial dimensions (depth, height, and +#' width) by taking the average value over an input window (of size defined by +#' `pool_size`) for each channel of the input. The window is shifted by +#' `strides` along each dimension. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` +#' +#' # Examples +#' ```{r} +#' depth <- height <- width <- 30 +#' channels <- 3 +#' +#' inputs <- layer_input(shape = c(depth, height, width, channels)) +#' outputs <- inputs |> layer_average_pooling_3d(pool_size = 3) +#' outputs # Shape: (batch_size, 10, 10, 10, 3) +#' ``` +#' +#' @param pool_size +#' int or list of 3 integers, factors by which to downscale +#' (dim1, dim2, dim3). If only one integer is specified, the same +#' window length will be used for all dimensions. +#' +#' @param strides +#' int or list of 3 integers, or `NULL`. Strides values. If `NULL`, +#' it will default to `pool_size`. If only one int is specified, the +#' same stride size will be used for all dimensions. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while +#' `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' It defaults to the `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json`. If you never set it, then it +#' will be `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_max_pooling_3d <- function(object, pool_size = c(2L, 2L, 2L), strides = NULL, padding = "valid", data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - create_layer(keras$layers$MaxPooling3D, object, list( - pool_size = as.integer(pool_size), - strides = as_nullable_integer(strides), - padding = padding, - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) - +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.AveragePooling3D +layer_average_pooling_3d <- +function (object, pool_size, strides = NULL, padding = "valid", + data_format = NULL, name = NULL, ...) +{ + args <- capture_args(list(pool_size = as_integer, strides = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$AveragePooling3D, object, args) } -#' Average pooling for temporal data. -#' -#' @inheritParams layer_max_pooling_1d -#' -#' @param pool_size Integer, size of the average pooling windows. -#' @param strides Integer, or NULL. Factor by which to downscale. E.g. 2 will -#' halve the input. If NULL, it will default to `pool_size`. -#' @param padding One of `"valid"` or `"same"` (case-insensitive). -#' @param data_format One of `channels_last` (default) or `channels_first`. -#' The ordering of the dimensions in the inputs. -#' -#' @section Input shape: 3D tensor with shape: `(batch_size, steps, features)`. -#' -#' @section Output shape: 3D tensor with shape: `(batch_size, downsampled_steps, -#' features)`. -#' -#' @family pooling layers +#' Global average pooling operation for temporal data. #' +#' @description +#' +#' # Call Arguments +#' - `inputs`: A 3D tensor. +#' - `mask`: Binary tensor of shape `(batch_size, steps)` indicating whether +#' a given step should be masked (excluded from the average). +#' +#' # Input Shape +#' - If `data_format='channels_last'`: +#' 3D tensor with shape: +#' `(batch_size, steps, features)` +#' - If `data_format='channels_first'`: +#' 3D tensor with shape: +#' `(batch_size, features, steps)` +#' +#' # Output Shape +#' - If `keepdims=FALSE`: +#' 2D tensor with shape `(batch_size, features)`. +#' - If `keepdims=TRUE`: +#' - If `data_format="channels_last"`: +#' 3D tensor with shape `(batch_size, 1, features)` +#' - If `data_format="channels_first"`: +#' 3D tensor with shape `(batch_size, features, 1)` +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 3, 4)) +#' y <- x |> layer_global_average_pooling_1d() +#' shape(y) +#' ``` +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param keepdims +#' A boolean, whether to keep the temporal dimension or not. +#' If `keepdims` is `FALSE` (default), the rank of the tensor is +#' reduced for spatial dimensions. If `keepdims` is `TRUE`, the +#' temporal dimension are retained with length 1. +#' The behavior is the same as for `tf$reduce_mean()` or `op_mean()`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_average_pooling_1d <- function(object, pool_size = 2L, strides = NULL, padding = "valid", - data_format = "channels_last", - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - - args <- list( - pool_size = as.integer(pool_size), - strides = as_nullable_integer(strides), - padding = padding, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - ) - - if (keras_version() >= "2.2.3") - args$data_format <- data_format - - create_layer(keras$layers$AveragePooling1D, object, args) - -} - -#' Average pooling operation for spatial data. -#' -#' @inheritParams layer_conv_2d -#' @inheritParams layer_average_pooling_1d -#' -#' @param pool_size integer or list of 2 integers, factors by which to downscale -#' (vertical, horizontal). (2, 2) will halve the input in both spatial -#' dimension. If only one integer is specified, the same window length will be -#' used for both dimensions. -#' @param strides Integer, list of 2 integers, or NULL. Strides values. If NULL, -#' it will default to `pool_size`. -#' @param padding One of `"valid"` or `"same"` (case-insensitive). -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` -#' - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` -#' -#' @section Output shape: -#' - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, pooled_rows, pooled_cols, channels)` -#' - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, pooled_rows, pooled_cols)` -#' #' @family pooling layers -#' -#' @export -layer_average_pooling_2d <- function(object, pool_size = c(2L, 2L), strides = NULL, padding = "valid", data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GlobalAveragePooling1D +layer_global_average_pooling_1d <- +function (object, data_format = NULL, keepdims = FALSE, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GlobalAveragePooling1D, object, + args) +} - create_layer(keras$layers$AveragePooling2D, object, list( - pool_size = as.integer(pool_size), - strides = as_nullable_integer(strides), - padding = padding, - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) +#' Global average pooling operation for 2D data. +#' +#' @description +#' +#' # Input Shape +#' - If `data_format='channels_last'`: +#' 4D tensor with shape: +#' `(batch_size, height, width, channels)` +#' - If `data_format='channels_first'`: +#' 4D tensor with shape: +#' `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' - If `keepdims=FALSE`: +#' 2D tensor with shape `(batch_size, channels)`. +#' - If `keepdims=TRUE`: +#' - If `data_format="channels_last"`: +#' 4D tensor with shape `(batch_size, 1, 1, channels)` +#' - If `data_format="channels_first"`: +#' 4D tensor with shape `(batch_size, channels, 1, 1)` +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 4, 5, 3)) +#' y <- x |> layer_global_average_pooling_2d() +#' shape(y) +#' ``` +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, height, weight)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @param keepdims +#' A boolean, whether to keep the temporal dimension or not. +#' If `keepdims` is `FALSE` (default), the rank of the tensor is +#' reduced for spatial dimensions. If `keepdims` is `TRUE`, the +#' spatial dimension are retained with length 1. +#' The behavior is the same as for `tf$reduce_mean()` or `op_mean()`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GlobalAveragePooling2D +layer_global_average_pooling_2d <- +function (object, data_format = NULL, keepdims = FALSE, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GlobalAveragePooling2D, object, + args) } -#' Average pooling operation for 3D data (spatial or spatio-temporal). -#' -#' @inheritParams layer_average_pooling_1d -#' -#' @param pool_size list of 3 integers, factors by which to downscale (dim1, -#' dim2, dim3). (2, 2, 2) will halve the size of the 3D input in each -#' dimension. -#' @param strides list of 3 integers, or NULL. Strides values. -#' @param padding One of `"valid"` or `"same"` (case-insensitive). -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` -#' - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` -#' -#' @section Output shape: -#' - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` -#' - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` -#' -#' @family pooling layers -#' -#' @export -layer_average_pooling_3d <- function(object, pool_size = c(2L, 2L, 2L), strides = NULL, padding = "valid", data_format = NULL, - batch_size = NULL, name = NULL, trainable = NULL, weights = NULL) { - create_layer(keras$layers$AveragePooling3D, object, list( - pool_size = as.integer(pool_size), - strides = as_nullable_integer(strides), - padding = padding, - data_format = data_format, - batch_size = as_nullable_integer(batch_size), - name = name, - trainable = trainable, - weights = weights - )) +#' Global average pooling operation for 3D data. +#' +#' @description +#' +#' # Input Shape +#' - If `data_format='channels_last'`: +#' 5D tensor with shape: +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' - If `data_format='channels_first'`: +#' 5D tensor with shape: +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` +#' +#' # Output Shape +#' - If `keepdims=FALSE`: +#' 2D tensor with shape `(batch_size, channels)`. +#' - If `keepdims=TRUE`: +#' - If `data_format="channels_last"`: +#' 5D tensor with shape `(batch_size, 1, 1, 1, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape `(batch_size, channels, 1, 1, 1)` +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 4, 5, 4, 3)) +#' y <- x |> layer_global_average_pooling_3d() +#' shape(y) +#' ``` +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' It defaults to the `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json`. If you never set it, then it +#' will be `"channels_last"`. +#' +#' @param keepdims +#' A boolean, whether to keep the temporal dimension or not. +#' If `keepdims` is `FALSE` (default), the rank of the tensor is +#' reduced for spatial dimensions. If `keepdims` is `TRUE`, the +#' spatial dimension are retained with length 1. +#' The behavior is the same as for `tf$reduce_mean()` or `op_mean()`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GlobalAveragePooling3D +layer_global_average_pooling_3d <- +function (object, data_format = NULL, keepdims = FALSE, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GlobalAveragePooling3D, object, + args) } #' Global max pooling operation for temporal data. #' -#' @inheritParams layer_dense -#' @inheritParams layer_average_pooling_1d -#' -#' @param keepdims A boolean, whether to keep the spatial dimensions or not. If -#' `keepdims` is `FALSE` (default), the rank of the tensor is reduced for -#' spatial dimensions. If `keepdims` is `TRUE`, the spatial dimensions are -#' retained with length 1. The behavior is the same as for `tf.reduce_mean` or -#' `np.mean`. -#' -#' @param ... standard layer arguments. -#' -#' @section Input shape: -#' 3D tensor with shape: `(batch_size, steps, features)`. -#' -#' @section Output shape: -#' 2D tensor with shape: `(batch_size, channels)` -#' -#' @family pooling layers -#' +#' @description +#' +#' # Input Shape +#' - If `data_format='channels_last'`: +#' 3D tensor with shape: +#' `(batch_size, steps, features)` +#' - If `data_format='channels_first'`: +#' 3D tensor with shape: +#' `(batch_size, features, steps)` +#' +#' # Output Shape +#' - If `keepdims=FALSE`: +#' 2D tensor with shape `(batch_size, features)`. +#' - If `keepdims=TRUE`: +#' - If `data_format="channels_last"`: +#' 3D tensor with shape `(batch_size, 1, features)` +#' - If `data_format="channels_first"`: +#' 3D tensor with shape `(batch_size, features, 1)` +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 3, 4)) +#' y <- x |> layer_global_max_pooling_1d() +#' shape(y) +#' ``` +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param keepdims +#' A boolean, whether to keep the temporal dimension or not. +#' If `keepdims` is `FALSE` (default), the rank of the tensor is +#' reduced for spatial dimensions. If `keepdims` is `TRUE`, the +#' temporal dimension are retained with length 1. +#' The behavior is the same as for `tf$reduce_mean()` or `op_mean()`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GlobalMaxPooling1D layer_global_max_pooling_1d <- -function(object, data_format = "channels_last", keepdims = FALSE, ...) +function (object, data_format = NULL, keepdims = FALSE, ...) { - args <- capture_args(match.call(), - list(batch_size = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$GlobalMaxPooling1D, object, args) + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GlobalMaxPooling1D, object, args) } -#' Global average pooling operation for temporal data. -#' -#' @inheritParams layer_global_max_pooling_1d -#' -#' @section Input shape: -#' 3D tensor with shape: `(batch_size, steps, features)`. -#' -#' @section Output shape: -#' 2D tensor with shape: `(batch_size, channels)` -#' -#' @family pooling layers -#' +#' Global max pooling operation for 2D data. +#' +#' @description +#' +#' # Input Shape +#' - If `data_format='channels_last'`: +#' 4D tensor with shape: +#' `(batch_size, height, width, channels)` +#' - If `data_format='channels_first'`: +#' 4D tensor with shape: +#' `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' - If `keepdims=FALSE`: +#' 2D tensor with shape `(batch_size, channels)`. +#' - If `keepdims=TRUE`: +#' - If `data_format="channels_last"`: +#' 4D tensor with shape `(batch_size, 1, 1, channels)` +#' - If `data_format="channels_first"`: +#' 4D tensor with shape `(batch_size, channels, 1, 1)` +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 4, 5, 3)) +#' y <- x |> layer_global_max_pooling_2d() +#' shape(y) +#' ``` +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, height, weight)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @param keepdims +#' A boolean, whether to keep the temporal dimension or not. +#' If `keepdims` is `FALSE` (default), the rank of the tensor is +#' reduced for spatial dimensions. If `keepdims` is `TRUE`, the +#' spatial dimension are retained with length 1. +#' The behavior is the same as for `tf$reduce_mean()` or `op_mean()`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_global_average_pooling_1d <- -function(object, data_format = "channels_last", keepdims = FALSE, ...) +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GlobalMaxPooling2D +layer_global_max_pooling_2d <- +function (object, data_format = NULL, keepdims = FALSE, ...) { - args <- capture_args(match.call(), - list(batch_size = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$GlobalAveragePooling1D, object, args) + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GlobalMaxPooling2D, object, args) } -#' Global max pooling operation for spatial data. -#' -#' @inheritParams layer_conv_2d -#' @inheritParams layer_global_max_pooling_1d -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` -#' - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` -#' -#' @section Output shape: 2D tensor with shape: `(batch_size, channels)` -#' -#' @family pooling layers -#' +#' Global max pooling operation for 3D data. +#' +#' @description +#' +#' # Input Shape +#' - If `data_format='channels_last'`: +#' 5D tensor with shape: +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' - If `data_format='channels_first'`: +#' 5D tensor with shape: +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` +#' +#' # Output Shape +#' - If `keepdims=FALSE`: +#' 2D tensor with shape `(batch_size, channels)`. +#' - If `keepdims=TRUE`: +#' - If `data_format="channels_last"`: +#' 5D tensor with shape `(batch_size, 1, 1, 1, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape `(batch_size, channels, 1, 1, 1)` +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 4, 5, 4, 3)) +#' y <- x |> layer_global_max_pooling_3d() +#' shape(y) +#' ``` +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' It defaults to the `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json`. If you never set it, then it +#' will be `"channels_last"`. +#' +#' @param keepdims +#' A boolean, whether to keep the temporal dimension or not. +#' If `keepdims` is `FALSE` (default), the rank of the tensor is +#' reduced for spatial dimensions. If `keepdims` is `TRUE`, the +#' spatial dimension are retained with length 1. +#' The behavior is the same as for `tf$reduce_mean()` or `op_mean()`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_global_max_pooling_2d <- -function(object, data_format = NULL, keepdims = FALSE, ...) +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GlobalMaxPooling3D +layer_global_max_pooling_3d <- +function (object, data_format = NULL, keepdims = FALSE, ...) { - args <- capture_args(match.call(), - list(batch_size = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$GlobalMaxPooling2D, object, args) + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GlobalMaxPooling3D, object, args) } -#' Global average pooling operation for spatial data. -#' -#' @inheritParams layer_conv_2d -#' @inheritParams layer_global_average_pooling_1d -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 4D tensor with shape: `(batch_size, rows, cols, channels)` -#' - If `data_format='channels_first'`: 4D tensor with shape: `(batch_size, channels, rows, cols)` -#' -#' @section Output shape: 2D tensor with shape: `(batch_size, channels)` -#' -#' @family pooling layers -#' +#' Max pooling operation for 1D temporal data. +#' +#' @description +#' Downsamples the input representation by taking the maximum value over a +#' spatial window of size `pool_size`. The window is shifted by `strides`. +#' +#' The resulting output when using the `"valid"` padding option has a shape of: +#' `output_shape = (input_shape - pool_size + 1) / strides)`. +#' +#' The resulting output shape when using the `"same"` padding option is: +#' `output_shape = input_shape / strides` +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 3D tensor with shape `(batch_size, steps, features)`. +#' - If `data_format="channels_first"`: +#' 3D tensor with shape `(batch_size, features, steps)`. +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 3D tensor with shape `(batch_size, downsampled_steps, features)`. +#' - If `data_format="channels_first"`: +#' 3D tensor with shape `(batch_size, features, downsampled_steps)`. +#' +#' # Examples +#' `strides=1` and `padding="valid"`: +#' +#' ```{r} +#' x <- op_reshape(c(1, 2, 3, 4, 5), +#' c(1, 5, 1)) +#' max_pool_1d <- layer_max_pooling_1d(pool_size = 2, +#' strides = 1, +#' padding = "valid") +#' max_pool_1d(x) +#' ``` +#' +#' `strides=2` and `padding="valid"`: +#' +#' ```{r} +#' x <- op_reshape(c(1, 2, 3, 4, 5), +#' c(1, 5, 1)) +#' max_pool_1d <- layer_max_pooling_1d(pool_size = 2, +#' strides = 2, +#' padding = "valid") +#' max_pool_1d(x) +#' ``` +#' +#' `strides=1` and `padding="same"`: +#' +#' ```{r} +#' x <- op_reshape(c(1, 2, 3, 4, 5), +#' c(1, 5, 1)) +#' max_pool_1d <- layer_max_pooling_1d(pool_size = 2, +#' strides = 1, +#' padding = "same") +#' max_pool_1d(x) +#' ``` +#' +#' @param pool_size +#' int, size of the max pooling window. +#' +#' @param strides +#' int or `NULL`. Specifies how much the pooling window moves +#' for each pooling step. If `NULL`, it will default to `pool_size`. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_global_average_pooling_2d <- -function(object, data_format = NULL, keepdims = FALSE, ...) +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.MaxPooling1D +layer_max_pooling_1d <- +function (object, pool_size = 2L, strides = NULL, padding = "valid", + data_format = NULL, name = NULL, ...) { - args <- capture_args(match.call(), - list(batch_size = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$GlobalAveragePooling2D, object, args) + args <- capture_args(list(pool_size = as_integer, strides = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$MaxPooling1D, object, args) } -# TODO: standard layer args used to contain: -# batch_size = NULL, name = NULL, trainable = NULL, weights = NULL - - -#' Global Max pooling operation for 3D data. -#' -#' @inheritParams layer_global_max_pooling_2d -#' -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` -#' - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` -#' -#' @section Output shape: 2D tensor with shape: `(batch_size, channels)` -#' -#' @family pooling layers -#' +#' Max pooling operation for 2D spatial data. +#' +#' @description +#' Downsamples the input along its spatial dimensions (height and width) +#' by taking the maximum value over an input window +#' (of size defined by `pool_size`) for each channel of the input. +#' The window is shifted by `strides` along each dimension. +#' +#' The resulting output when using the `"valid"` padding option has a spatial +#' shape (number of rows or columns) of: +#' `output_shape = floor((input_shape - pool_size) / strides) + 1` +#' (when `input_shape >= pool_size`) +#' +#' The resulting output shape when using the `"same"` padding option is: +#' `output_shape = floor((input_shape - 1) / strides) + 1` +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 4D tensor with shape `(batch_size, height, width, channels)`. +#' - If `data_format="channels_first"`: +#' 4D tensor with shape `(batch_size, channels, height, width)`. +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 4D tensor with shape +#' `(batch_size, pooled_height, pooled_width, channels)`. +#' - If `data_format="channels_first"`: +#' 4D tensor with shape +#' `(batch_size, channels, pooled_height, pooled_width)`. +#' +#' # Examples +#' `strides = (1, 1)` and `padding = "valid"`: +#' +#' ```{r} +#' x <- rbind(c(1., 2., 3.), +#' c(4., 5., 6.), +#' c(7., 8., 9.)) |> op_reshape(c(1, 3, 3, 1)) +#' max_pool_2d <- layer_max_pooling_2d(pool_size = c(2, 2), +#' strides = c(1, 1), +#' padding = "valid") +#' max_pool_2d(x) +#' ``` +#' +#' `strides = c(2, 2)` and `padding = "valid"`: +#' +#' ```{r} +#' x <- rbind(c(1., 2., 3., 4.), +#' c(5., 6., 7., 8.), +#' c(9., 10., 11., 12.)) |> op_reshape(c(1, 3, 4, 1)) +#' max_pool_2d <- layer_max_pooling_2d(pool_size = c(2, 2), +#' strides = c(2, 2), +#' padding = "valid") +#' max_pool_2d(x) +#' ``` +#' +#' `stride = (1, 1)` and `padding = "same"`: +#' +#' ```{r} +#' x <- rbind(c(1., 2., 3.), +#' c(4., 5., 6.), +#' c(7., 8., 9.)) |> op_reshape(c(1, 3, 3, 1)) +#' max_pool_2d <- layer_max_pooling_2d(pool_size = c(2, 2), +#' strides = c(1, 1), +#' padding = "same") +#' max_pool_2d(x) +#' ``` +#' +#' @param pool_size +#' int or list of 2 integers, factors by which to downscale +#' (dim1, dim2). If only one integer is specified, the same +#' window length will be used for all dimensions. +#' +#' @param strides +#' int or list of 2 integers, or `NULL`. Strides values. If `NULL`, +#' it will default to `pool_size`. If only one int is specified, the +#' same stride size will be used for all dimensions. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_global_max_pooling_3d <- -function(object, data_format = NULL, keepdims = FALSE, ...) +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.MaxPooling2D +layer_max_pooling_2d <- +function (object, pool_size = list(2L, 2L), strides = NULL, padding = "valid", + data_format = NULL, name = NULL, ...) { - args <- capture_args(match.call(), - list(batch_size = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$GlobalMaxPooling3D, object, args) + args <- capture_args(list(pool_size = as_integer, strides = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$MaxPooling2D, object, args) } -#' Global Average pooling operation for 3D data. -#' -#' @inheritParams layer_global_average_pooling_2d -#' -#' @param data_format A string, one of `channels_last` (default) or -#' `channels_first`. The ordering of the dimensions in the inputs. -#' `channels_last` corresponds to inputs with shape `(batch, spatial_dim1, -#' spatial_dim2, spatial_dim3, channels)` while `channels_first` corresponds -#' to inputs with shape `(batch, channels, spatial_dim1, spatial_dim2, -#' spatial_dim3)`. It defaults to the `image_data_format` value found in your -#' Keras config file at `~/.keras/keras.json`. If you never set it, then it -#' will be "channels_last". -#' -#' @section Input shape: -#' - If `data_format='channels_last'`: 5D tensor with shape: `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` -#' - If `data_format='channels_first'`: 5D tensor with shape: `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` -#' -#' @section Output shape: 2D tensor with shape: `(batch_size, channels)` -#' -#' @family pooling layers +#' Max pooling operation for 3D data (spatial or spatio-temporal). #' +#' @description +#' Downsamples the input along its spatial dimensions (depth, height, and +#' width) by taking the maximum value over an input window (of size defined by +#' `pool_size`) for each channel of the input. The window is shifted by +#' `strides` along each dimension. +#' +#' # Input Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` +#' +#' # Output Shape +#' - If `data_format="channels_last"`: +#' 5D tensor with shape: +#' `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` +#' - If `data_format="channels_first"`: +#' 5D tensor with shape: +#' `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` +#' +#' # Examples +#' ```{r} +#' depth <- 30 +#' height <- 30 +#' width <- 30 +#' channels <- 3 +#' +#' inputs <- layer_input(shape=c(depth, height, width, channels)) +#' layer <- layer_max_pooling_3d(pool_size=3) +#' outputs <- inputs |> layer() +#' outputs +#' ``` +#' +#' @param pool_size +#' int or list of 3 integers, factors by which to downscale +#' (dim1, dim2, dim3). If only one integer is specified, the same +#' window length will be used for all dimensions. +#' +#' @param strides +#' int or list of 3 integers, or `NULL`. Strides values. If `NULL`, +#' it will default to `pool_size`. If only one int is specified, the +#' same stride size will be used for all dimensions. +#' +#' @param padding +#' string, either `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape +#' `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` while +#' `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' It defaults to the `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json`. If you never set it, then it +#' will be `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return #' @export -layer_global_average_pooling_3d <- -function(object, data_format = NULL, keepdims = FALSE, ...) +#' @family pooling layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.MaxPooling3D +layer_max_pooling_3d <- +function (object, pool_size = list(2L, 2L, 2L), strides = NULL, + padding = "valid", data_format = NULL, name = NULL, ...) { - args <- capture_args(match.call(), - list(batch_size = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$GlobalAveragePooling3D, object, args) + args <- capture_args(list(pool_size = as_integer, strides = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$MaxPooling3D, object, args) } diff --git a/R/layers-preprocessing.R b/R/layers-preprocessing.R index ad37367de0..bd16416790 100644 --- a/R/layers-preprocessing.R +++ b/R/layers-preprocessing.R @@ -1,945 +1,1770 @@ -## ---- image preprocessing ---- -#' Image resizing layer -#' -#' @details -#' Resize the batched image input to target height and width. The input should -#' be a 4D (batched) or 3D (unbatched) tensor in `"channels_last"` format. -#' -#' @inheritParams layer_dense -#' -#' @param height Integer, the height of the output shape. -#' -#' @param width Integer, the width of the output shape. -#' -#' @param interpolation String, the interpolation method. Defaults to `"bilinear"`. -#' Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, -#' `"lanczos5"`, `"gaussian"`, and `"mitchellcubic"`. -#' -#' @param crop_to_aspect_ratio If TRUE, resize the images without aspect -#' ratio distortion. When the original aspect ratio differs from the target -#' aspect ratio, the output image will be cropped so as to return the largest -#' possible window in the image (of size `(height, width)`) that matches -#' the target aspect ratio. By default (`crop_to_aspect_ratio = FALSE`), -#' aspect ratio may not be preserved. -#' -#' @param ... standard layer arguments. + +#' A preprocessing layer which encodes integer features. #' -#' @family image preprocessing layers +#' @description +#' This layer provides options for condensing data into a categorical encoding +#' when the total number of tokens are known in advance. It accepts integer +#' values as inputs, and it outputs a dense or sparse representation of those +#' inputs. For integer inputs where the total number of tokens is not known, +#' use `layer_integer_lookup()` instead. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Examples +#' **One-hot encoding data** +#' +#' ```{r} +#' layer <- layer_category_encoding(num_tokens = 4, output_mode = "one_hot") +#' x <- op_array(c(3, 2, 0, 1), "int32") +#' layer(x) +#' ``` +#' +#' **Multi-hot encoding data** +#' +#' ```{r} +#' layer <- layer_category_encoding(num_tokens = 4, output_mode = "multi_hot") +#' x <- op_array(rbind(c(0, 1), +#' c(0, 0), +#' c(1, 2), +#' c(3, 1)), "int32") +#' layer(x) +#' ``` +#' +#' **Using weighted inputs in `"count"` mode** +#' +#' ```{r, eval = FALSE} +#' layer <- layer_category_encoding(num_tokens = 4, output_mode = "count") +#' count_weights <- op_array(rbind(c(.1, .2), +#' c(.1, .1), +#' c(.2, .3), +#' c(.4, .2))) +#' x <- op_array(rbind(c(0, 1), +#' c(0, 0), +#' c(1, 2), +#' c(3, 1)), "int32") +#' layer(x, count_weights = count_weights) +#' # array([[01, 02, 0. , 0. ], +#' # [02, 0. , 0. , 0. ], +#' # [0. , 02, 03, 0. ], +#' # [0. , 02, 0. , 04]]> +#' ``` +#' +#' # Call Arguments +#' - `inputs`: A 1D or 2D tensor of integer inputs. +#' - `count_weights`: A tensor in the same shape as `inputs` indicating the +#' weight for each sample value when summing up in `count` mode. +#' Not used in `"multi_hot"` or `"one_hot"` modes. +#' +#' @param num_tokens +#' The total number of tokens the layer should support. All +#' inputs to the layer must integers in the range `0 <= value < +#' num_tokens`, or an error will be thrown. +#' +#' @param output_mode +#' Specification for the output of the layer. +#' Values can be `"one_hot"`, `"multi_hot"` or `"count"`, +#' configuring the layer as follows: +#' - `"one_hot"`: Encodes each individual element in the input +#' into an array of `num_tokens` size, containing a 1 at the +#' element index. If the last dimension is size 1, will encode +#' on that dimension. If the last dimension is not size 1, +#' will append a new dimension for the encoded output. +#' - `"multi_hot"`: Encodes each sample in the input into a single +#' array of `num_tokens` size, containing a 1 for each +#' vocabulary term present in the sample. Treats the last +#' dimension as the sample dimension, if input shape is +#' `(..., sample_length)`, output shape will be +#' `(..., num_tokens)`. +#' - `"count"`: Like `"multi_hot"`, but the int array contains a +#' count of the number of times the token at that index +#' appeared in the sample. +#' For all output modes, currently only output up to rank 2 is +#' supported. +#' Defaults to `"multi_hot"`. +#' +#' @param sparse +#' Whether to return a sparse tensor; for backends that support +#' sparse tensors. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family categorical features preprocessing layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_resizing <- -function(object, height, width, interpolation = "bilinear", - crop_to_aspect_ratio = FALSE, ...) +#' + +# + +#' @tether keras.layers.CategoryEncoding +layer_category_encoding <- +function (object, num_tokens = NULL, output_mode = "multi_hot", + sparse = FALSE, ...) { - require_tf_version("2.6", "layer_resizing()") - args <- capture_args(match.call(), - list(height = as.integer, width = as.integer, - interpolation = fix_string), - ignore = "object") - create_layer(keras$layers$Resizing, object, args) + args <- capture_args(list(output_mode = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + num_tokens = as_integer), ignore = "object") + create_layer(keras$layers$CategoryEncoding, object, args) } -#' Multiply inputs by `scale` and adds `offset` +#' A preprocessing layer which crops images. #' -#' @details -#' For instance: +#' @description +#' This layers crops the central portion of the images to a target size. If an +#' image is smaller than the target size, it will be resized and cropped +#' so as to return the largest possible window in the image that matches +#' the target aspect ratio. #' -#' 1. To rescale an input in the `[0, 255]` range -#' to be in the `[0, 1]` range, you would pass `scale=1./255`. +#' Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`). #' -#' 2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range, -#' you would pass `scale = 1/127.5, offset = -1`. +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format, +#' or `(..., channels, height, width)`, in `"channels_first"` format. +#' +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., target_height, target_width, channels)`, +#' or `(..., channels, target_height, target_width)`, +#' in `"channels_first"` format. +#' +#' If the input height/width is even and the target height/width is odd (or +#' inversely), the input image is left-padded by 1 pixel. #' -#' The rescaling is applied both during training and inference. +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). #' -#' Input shape: -#' Arbitrary. +#' @param height +#' Integer, the height of the output shape. #' -#' Output shape: -#' Same as input. +#' @param width +#' Integer, the width of the output shape. #' -#' @inheritParams layer_dense +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. #' -#' @param scale Float, the scale to apply to the inputs. +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @param offset Float, the offset to apply to the inputs. -#' @param ... standard layer arguments. +#' @param ... +#' For forward/backward compatability. #' +#' @inherit layer_dense return +#' @export #' @family image preprocessing layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_rescaling <- -function(object, scale, offset = 0, ...) +#' + +# + +#' @tether keras.layers.CenterCrop +layer_center_crop <- +function (object, height, width, data_format = NULL, ...) { - require_tf_version("2.6", "layer_rescaling()") - args <- capture_args(match.call(), ignore = "object") - create_layer(keras$layers$Rescaling, object, args) + args <- capture_args(list(height = as_integer, width = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$CenterCrop, object, args) } +#' A preprocessing layer which buckets continuous features by ranges. +#' +#' @description +#' This layer will place each element of its input data into one of several +#' contiguous ranges and output an integer index indicating which range each +#' element was placed in. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Input Shape +#' Any array of dimension 2 or higher. +#' +#' # Output Shape +#' Same as input shape. +#' +#' # Examples +#' Discretize float values based on provided buckets. +#' ```{r} +#' input <- op_array(rbind(c(-1.5, 1, 3.4, 0.5), +#' c(0, 3, 1.3, 0), +#' c(-.5, 0, .5, 1), +#' c(1.5, 2, 2.5, 3))) +#' output <- input |> layer_discretization(bin_boundaries = c(0, 1, 2)) +#' output +#' ``` +#' +#' Discretize float values based on a number of buckets to compute. +#' ```{r} +#' layer <- layer_discretization(num_bins = 4, epsilon = 0.01) +#' layer |> adapt(input) +#' layer(input) +#' ``` +#' +#' @param bin_boundaries +#' A list of bin boundaries. +#' The leftmost and rightmost bins +#' will always extend to `-Inf` and `Inf`, +#' so `bin_boundaries = c(0, 1, 2)` +#' generates bins `(-Inf, 0)`, `[0, 1)`, `[1, 2)`, +#' and `[2, +Inf)`. +#' If this option is set, `adapt()` should not be called. +#' +#' @param num_bins +#' The integer number of bins to compute. +#' If this option is set, +#' `adapt()` should be called to learn the bin boundaries. +#' +#' @param epsilon +#' Error tolerance, typically a small fraction +#' close to zero (e.g. 0.01). Higher values of epsilon increase +#' the quantile approximation, and hence result in more +#' unequal buckets, but could improve performance +#' and resource consumption. +#' +#' @param output_mode +#' Specification for the output of the layer. +#' Values can be `"int"`, `"one_hot"`, `"multi_hot"`, or +#' `"count"` configuring the layer as follows: +#' - `"int"`: Return the discretized bin indices directly. +#' - `"one_hot"`: Encodes each individual element in the +#' input into an array the same size as `num_bins`, +#' containing a 1 at the input's bin +#' index. If the last dimension is size 1, will encode on that +#' dimension. If the last dimension is not size 1, +#' will append a new dimension for the encoded output. +#' - `"multi_hot"`: Encodes each sample in the input into a +#' single array the same size as `num_bins`, +#' containing a 1 for each bin index +#' index present in the sample. +#' Treats the last dimension as the sample +#' dimension, if input shape is `(..., sample_length)`, +#' output shape will be `(..., num_tokens)`. +#' - `"count"`: As `"multi_hot"`, but the int array contains +#' a count of the number of times the bin index appeared +#' in the sample. +#' Defaults to `"int"`. +#' +#' @param sparse +#' Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, +#' and `"count"` output modes. Only supported with TensorFlow +#' backend. If `TRUE`, returns a `SparseTensor` instead of +#' a dense `Tensor`. Defaults to `FALSE`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param dtype +#' datatype (e.g., `"float32"`). +#' +#' @inherit layer_dense return +#' @export +#' @family numerical features preprocessing layers +#' @family preprocessing layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.Discretization +layer_discretization <- +function (object, bin_boundaries = NULL, num_bins = NULL, epsilon = 0.01, + output_mode = "int", sparse = FALSE, dtype = NULL, name = NULL) +{ + args <- capture_args(list(num_bins = as_integer, output_mode = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Discretization, object, args) +} -#' Crop the central portion of the images to target height and width -#' -#' @details -#' Input shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format. +#' A preprocessing layer which crosses features using the "hashing trick". #' -#' Output shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., target_height, target_width, channels)`. +#' @description +#' This layer performs crosses of categorical features using the "hashing +#' trick". Conceptually, the transformation can be thought of as: +#' `hash(concatenate(features)) %% num_bins`. #' -#' If the input height/width is even and the target height/width is odd (or -#' inversely), the input image is left-padded by 1 pixel. +#' This layer currently only performs crosses of scalar inputs and batches of +#' scalar inputs. Valid input shapes are `(batch_size, 1)`, `(batch_size)` and +#' `()`. #' -#' @inheritParams layer_dense +#' **Note:** This layer wraps `tf.keras.layers.HashedCrossing`. It cannot +#' be used as part of the compiled computation graph of a model with +#' any backend other than TensorFlow. +#' It can however be used with any backend when running eagerly. +#' It can also always be used as part of an input preprocessing pipeline +#' with any backend (outside the model itself), which is how we recommend +#' to use this layer. #' -#' @param height Integer, the height of the output shape. +#' **Note:** This layer is safe to use inside a `tfdatasets` pipeline +#' (independently of which backend you're using). #' -#' @param width Integer, the width of the output shape. +#' # Examples #' -#' @param ... standard layer arguments. +#' ```{r} +#' feat1 <- c('A', 'B', 'A', 'B', 'A') |> as.array() +#' feat2 <- c(101, 101, 101, 102, 102) |> as.integer() |> as.array() +#' ``` #' +#' **Crossing two scalar features.** #' -#' @family image preprocessing layers -#' @family preprocessing layers +#' ```{r} +#' layer <- layer_hashed_crossing(num_bins = 5) +#' layer(list(feat1, feat2)) +#' ``` #' -#' @seealso -#' - -#' - -#' @export -layer_center_crop <- -function(object, height, width, ...) -{ - require_tf_version("2.6", "layer_center_crop()") - args <- capture_args(match.call(), - list(height = as.integer, width = as.integer), - ignore = "object") - create_layer(keras$layers$CenterCrop, object, args) -} - - -## ---- image augmentation ---- - -#' Randomly crop the images to target height and width +#' **Crossing and one-hotting two scalar features.** #' -#' @details -#' This layer will crop all the images in the same batch to the same cropping -#' location. -#' By default, random cropping is only applied during training. At inference -#' time, the images will be first rescaled to preserve the shorter side, and -#' center cropped. If you need to apply random cropping at inference time, -#' set `training` to `TRUE` when calling the layer. +#' ```{r} +#' layer <- layer_hashed_crossing(num_bins = 5, output_mode = 'one_hot') +#' layer(list(feat1, feat2)) +#' ``` #' -#' Input shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format. +#' @param num_bins +#' Number of hash bins. #' -#' Output shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., target_height, target_width, channels)`. +#' @param output_mode +#' Specification for the output of the layer. Values can be +#' `"int"`, or `"one_hot"` configuring the layer as follows: +#' - `"int"`: Return the integer bin indices directly. +#' - `"one_hot"`: Encodes each individual element in the input into an +#' array the same size as `num_bins`, containing a 1 at the input's +#' bin index. Defaults to `"int"`. #' -#' @inheritParams layer_dense +#' @param sparse +#' Boolean. Only applicable to `"one_hot"` mode and only valid +#' when using the TensorFlow backend. If `TRUE`, returns +#' a `SparseTensor` instead of a dense `Tensor`. Defaults to `FALSE`. #' -#' @param height Integer, the height of the output shape. +#' @param ... +#' Keyword arguments to construct a layer. #' -#' @param width Integer, the width of the output shape. +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @param seed Integer. Used to create a random seed. +#' @param name +#' String, name for the object #' -#' @param ... standard layer arguments. +#' @param dtype +#' datatype (e.g., `"float32"`). #' -#' @family image augmentation layers +#' @inherit layer_dense return +#' @export +#' @family categorical features preprocessing layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_random_crop <- -function(object, height, width, seed = NULL, ...) +#' + +# + +#' +#' @tether keras.layers.HashedCrossing +layer_hashed_crossing <- +function (object, num_bins, output_mode = "int", sparse = FALSE, + name = NULL, dtype = NULL, ...) { - require_tf_version("2.6", "layer_random_crop()") - args <- capture_args(match.call(), - list(height = as.integer, width = as.integer, - seed = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$RandomCrop, object, args) + args <- capture_args(list(output_mode = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + num_bins = as_integer), ignore = "object") + create_layer(keras$layers$HashedCrossing, object, args) } - -#' Randomly flip each image horizontally and vertically +#' A preprocessing layer which hashes and bins categorical features. #' -#' @details -#' This layer will flip the images based on the `mode` attribute. -#' During inference time, the output will be identical to input. Call the layer -#' with `training = TRUE` to flip the input. +#' @description +#' This layer transforms categorical inputs to hashed output. It element-wise +#' converts a ints or strings to ints in a fixed range. The stable hash +#' function uses `tensorflow::ops::Fingerprint` to produce the same output +#' consistently across all platforms. +#' +#' This layer uses [FarmHash64](https://github.com/google/farmhash) by default, +#' which provides a consistent hashed output across different platforms and is +#' stable across invocations, regardless of device and context, by mixing the +#' input bits thoroughly. #' -#' Input shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format. +#' If you want to obfuscate the hashed output, you can also pass a random +#' `salt` argument in the constructor. In that case, the layer will use the +#' [SipHash64](https://github.com/google/highwayhash) hash function, with +#' the `salt` value serving as additional input to the hash function. #' -#' Output shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format. +#' **Note:** This layer internally uses TensorFlow. It cannot +#' be used as part of the compiled computation graph of a model with +#' any backend other than TensorFlow. +#' It can however be used with any backend when running eagerly. +#' It can also always be used as part of an input preprocessing pipeline +#' with any backend (outside the model itself), which is how we recommend +#' to use this layer. #' -#' @inheritParams layer_dense +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). #' -#' @param mode String indicating which flip mode to use. Can be `"horizontal"`, -#' `"vertical"`, or `"horizontal_and_vertical"`. Defaults to -#' `"horizontal_and_vertical"`. `"horizontal"` is a left-right flip and -#' `"vertical"` is a top-bottom flip. +#' **Example (FarmHash64)** #' -#' @param seed Integer. Used to create a random seed. +#' ```{r} +#' layer <- layer_hashing(num_bins = 3) +#' inp <- c('A', 'B', 'C', 'D', 'E') |> array(dim = c(5, 1)) +#' layer(inp) +#' ``` #' -#' @param ... standard layer arguments. +#' **Example (FarmHash64) with a mask value** #' -#' @family image augmentation layers -#' @family preprocessing layers +#' ```{r} +#' layer <- layer_hashing(num_bins=3, mask_value='') +#' inp <- c('A', 'B', '', 'C', 'D') |> array(dim = c(5, 1)) +#' layer(inp) +#' ``` #' -#' @seealso -#' - -#' - +#' **Example (SipHash64)** +#' +#' ```{r} +#' layer <- layer_hashing(num_bins=3, salt=c(133, 137)) +#' inp <- c('A', 'B', 'C', 'D', 'E') |> array(dim = c(5, 1)) +#' layer(inp) +#' ``` +#' +#' **Example (Siphash64 with a single integer, same as `salt=[133, 133]`)** +#' +#' ```{r} +#' layer <- layer_hashing(num_bins=3, salt=133) +#' inp <- c('A', 'B', 'C', 'D', 'E') |> array(dim = c(5, 1)) +#' layer(inp) +#' ``` +#' +#' # Input Shape +#' A single string, a list of strings, or an `int32` or `int64` tensor +#' of shape `(batch_size, ...,)`. +#' +#' # Output Shape +#' An `int32` tensor of shape `(batch_size, ...)`. +#' +#' # Reference +#' - [SipHash with salt](https://en.wikipedia.org/wiki/SipHash) +#' +#' @param num_bins +#' Number of hash bins. Note that this includes the `mask_value` +#' bin, so the effective number of bins is `(num_bins - 1)` +#' if `mask_value` is set. +#' +#' @param mask_value +#' A value that represents masked inputs, which are mapped to +#' index 0. `NULL` means no mask term will be added and the +#' hashing will start at index 0. Defaults to `NULL`. +#' +#' @param salt +#' A single unsigned integer or `NULL`. +#' If passed, the hash function used will be SipHash64, +#' with these values used as an additional input +#' (known as a "salt" in cryptography). +#' These should be non-zero. If `NULL`, uses the FarmHash64 hash +#' function. It also supports list of 2 unsigned +#' integer numbers, see reference paper for details. +#' Defaults to `NULL`. +#' +#' @param output_mode +#' Specification for the output of the layer. Values can be +#' `"int"`, `"one_hot"`, `"multi_hot"`, or +#' `"count"` configuring the layer as follows: +#' - `"int"`: Return the integer bin indices directly. +#' - `"one_hot"`: Encodes each individual element in the input into an +#' array the same size as `num_bins`, containing a 1 +#' at the input's bin index. If the last dimension is size 1, +#' will encode on that dimension. +#' If the last dimension is not size 1, will append a new +#' dimension for the encoded output. +#' - `"multi_hot"`: Encodes each sample in the input into a +#' single array the same size as `num_bins`, +#' containing a 1 for each bin index +#' index present in the sample. Treats the last dimension +#' as the sample dimension, if input shape is +#' `(..., sample_length)`, output shape will be +#' `(..., num_tokens)`. +#' - `"count"`: As `"multi_hot"`, but the int array contains a count of +#' the number of times the bin index appeared in the sample. +#' Defaults to `"int"`. +#' +#' @param sparse +#' Boolean. Only applicable to `"one_hot"`, `"multi_hot"`, +#' and `"count"` output modes. Only supported with TensorFlow +#' backend. If `TRUE`, returns a `SparseTensor` instead of +#' a dense `Tensor`. Defaults to `FALSE`. +#' +#' @param ... +#' Keyword arguments to construct a layer. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return #' @export -layer_random_flip <- -function(object, mode = "horizontal_and_vertical", seed = NULL, ...) +#' @family categorical features preprocessing layers +#' @family preprocessing layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.Hashing +layer_hashing <- +function (object, num_bins, mask_value = NULL, salt = NULL, output_mode = "int", + sparse = FALSE, ...) { - require_tf_version("2.6", "layer_random_flip()") - args <- capture_args(match.call(), - list(seed = as_nullable_integer, - mode = fix_string), - ignore = "object") - create_layer(keras$layers$RandomFlip, object, args) + args <- capture_args(list(salt = as_integer, output_mode = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape, num_bins = as_integer), + ignore = "object") + create_layer(keras$layers$Hashing, object, args) } -#' Randomly translate each image during training -#' -#' @inheritParams layer_dense -#' -#' @param height_factor a float represented as fraction of value, or a list of size -#' 2 representing lower and upper bound for shifting vertically. A negative -#' value means shifting image up, while a positive value means shifting image -#' down. When represented as a single positive float, this value is used for -#' both the upper and lower bound. For instance, `height_factor = c(-0.2, 0.3)` -#' results in an output shifted by a random amount in the range -#' `[-20%, +30%]`. -#' `height_factor = 0.2` results in an output height shifted by a random amount -#' in the range `[-20%, +20%]`. -#' -#' @param width_factor a float represented as fraction of value, or a list of size 2 -#' representing lower and upper bound for shifting horizontally. A negative -#' value means shifting image left, while a positive value means shifting -#' image right. When represented as a single positive float, this value is -#' used for both the upper and lower bound. For instance, -#' `width_factor = c(-0.2, 0.3)` results in an output shifted left by 20%, and -#' shifted right by 30%. `width_factor = 0.2` results in an output height -#' shifted left or right by 20%. -#' -#' @param fill_mode Points outside the boundaries of the input are filled according -#' to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). -#' - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by -#' reflecting about the edge of the last pixel. -#' - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by -#' filling all values beyond the edge with the same constant value k = 0. -#' - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by -#' wrapping around to the opposite edge. -#' - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the -#' nearest pixel. +#' A preprocessing layer that maps integers to (possibly encoded) indices. #' -#' @param interpolation Interpolation mode. Supported values: `"nearest"`, -#' `"bilinear"`. -#' -#' @param seed Integer. Used to create a random seed. +#' @description +#' This layer maps a set of arbitrary integer input tokens into indexed integer +#' output via a table-based vocabulary lookup. The layer's output indices will +#' be contiguously arranged up to the maximum vocab size, even if the input +#' tokens are non-continguous or unbounded. The layer supports multiple options +#' for encoding the output via `output_mode`, and has optional support for +#' out-of-vocabulary (OOV) tokens and masking. #' -#' @param fill_value a float represents the value to be filled outside the boundaries -#' when `fill_mode="constant"`. +#' The vocabulary for the layer must be either supplied on construction or +#' learned via `adapt()`. During `adapt()`, the layer will analyze a data set, +#' determine the frequency of individual integer tokens, and create a +#' vocabulary from them. If the vocabulary is capped in size, the most frequent +#' tokens will be used to create the vocabulary and all others will be treated +#' as OOV. #' -#' @param ... standard layer arguments. +#' There are two possible output modes for the layer. When `output_mode` is +#' `"int"`, input integers are converted to their index in the vocabulary (an +#' integer). When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, +#' input integers are encoded into an array where each dimension corresponds to +#' an element in the vocabulary. #' -#' @family image augmentation layers +#' The vocabulary can optionally contain a mask token as well as an OOV token +#' (which can optionally occupy multiple indices in the vocabulary, as set +#' by `num_oov_indices`). +#' The position of these tokens in the vocabulary is fixed. When `output_mode` +#' is `"int"`, the vocabulary will begin with the mask token at index 0, +#' followed by OOV indices, followed by the rest of the vocabulary. When +#' `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will +#' begin with OOV indices and instances of the mask token will be dropped. +#' +#' **Note:** This layer uses TensorFlow internally. It cannot +#' be used as part of the compiled computation graph of a model with +#' any backend other than TensorFlow. +#' It can however be used with any backend when running eagerly. +#' It can also always be used as part of an input preprocessing pipeline +#' with any backend (outside the model itself), which is how we recommend +#' to use this layer. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Examples +#' **Creating a lookup layer with a known vocabulary** +#' +#' This example creates a lookup layer with a pre-existing vocabulary. +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' data <- op_array(rbind(c(12, 1138, 42), +#' c(42, 1000, 36))) # Note OOV tokens +#' out <- data |> layer_integer_lookup(vocabulary = vocab) +#' out +#' ``` +#' +#' **Creating a lookup layer with an adapted vocabulary** +#' +#' This example creates a lookup layer and generates the vocabulary by +#' analyzing the dataset. +#' +#' ```{r} +#' data <- op_array(rbind(c(12, 1138, 42), +#' c(42, 1000, 36))) # Note OOV tokens +#' layer <- layer_integer_lookup() +#' layer |> adapt(data) +#' layer |> get_vocabulary() |> str() +#' ``` +#' +#' Note that the OOV token -1 have been added to the vocabulary. The remaining +#' tokens are sorted by frequency (42, which has 2 occurrences, is first) then +#' by inverse sort order. +#' +#' ```{r} +#' layer(data) +#' ``` +#' +#' **Lookups with multiple OOV indices** +#' +#' This example demonstrates how to use a lookup layer with multiple OOV +#' indices. When a layer is created with more than one OOV index, any OOV +#' tokens are hashed into the number of OOV buckets, distributing OOV tokens in +#' a deterministic fashion across the set. +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' data <- op_array(rbind(c(12, 1138, 42), +#' c(37, 1000, 36))) # Note OOV tokens +#' out <- data |> +#' layer_integer_lookup(vocabulary = vocab, +#' num_oov_indices = 2) +#' out +#' ``` +#' +#' Note that the output for OOV token 37 is 1, while the output for OOV token +#' 1000 is 0. The in-vocab terms have their output index increased by 1 from +#' earlier examples (12 maps to 2, etc) in order to make space for the extra +#' OOV token. +#' +#' **One-hot output** +#' +#' Configure the layer with `output_mode='one_hot'`. Note that the first +#' `num_oov_indices` dimensions in the ont_hot encoding represent OOV values. +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' data <- op_array(c(12, 36, 1138, 42, 7), 'int32') # Note OOV tokens +#' layer <- layer_integer_lookup(vocabulary = vocab, +#' output_mode = 'one_hot') +#' layer(data) +#' ``` +#' +#' **Multi-hot output** +#' +#' Configure the layer with `output_mode = 'multi_hot'`. Note that the first +#' `num_oov_indices` dimensions in the multi_hot encoding represent OOV tokens +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' data <- op_array(rbind(c(12, 1138, 42, 42), +#' c(42, 7, 36, 7)), "int64") # Note OOV tokens +#' layer <- layer_integer_lookup(vocabulary = vocab, +#' output_mode = 'multi_hot') +#' layer(data) +#' ``` +#' +#' **Token count output** +#' +#' Configure the layer with `output_mode='count'`. As with multi_hot output, +#' the first `num_oov_indices` dimensions in the output represent OOV tokens. +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' data <- rbind(c(12, 1138, 42, 42), +#' c(42, 7, 36, 7)) |> op_array("int64") +#' layer <- layer_integer_lookup(vocabulary = vocab, +#' output_mode = 'count') +#' layer(data) +#' ``` +#' +#' **TF-IDF output** +#' +#' Configure the layer with `output_mode='tf_idf'`. As with multi_hot output, +#' the first `num_oov_indices` dimensions in the output represent OOV tokens. +#' +#' Each token bin will output `token_count * idf_weight`, where the idf weights +#' are the inverse document frequency weights per token. These should be +#' provided along with the vocabulary. Note that the `idf_weight` for OOV +#' tokens will default to the average of all idf weights passed in. +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' idf_weights <- c(0.25, 0.75, 0.6, 0.4) +#' data <- rbind(c(12, 1138, 42, 42), +#' c(42, 7, 36, 7)) |> op_array("int64") +#' layer <- layer_integer_lookup(output_mode = 'tf_idf', +#' vocabulary = vocab, +#' idf_weights = idf_weights) +#' layer(data) +#' ``` +#' +#' To specify the idf weights for oov tokens, you will need to pass the entire +#' vocabulary including the leading oov token. +#' +#' ```{r} +#' vocab <- c(-1, 12, 36, 1138, 42) |> as.integer() +#' idf_weights <- c(0.9, 0.25, 0.75, 0.6, 0.4) +#' data <- rbind(c(12, 1138, 42, 42), +#' c(42, 7, 36, 7)) |> op_array("int64") +#' layer <- layer_integer_lookup(output_mode = 'tf_idf', +#' vocabulary = vocab, +#' idf_weights = idf_weights) +#' layer(data) +#' ``` +#' +#' When adapting the layer in `"tf_idf"` mode, each input sample will +#' be considered a document, and IDF weight per token will be +#' calculated as: +#' `log(1 + num_documents / (1 + token_document_count))`. +#' +#' **Inverse lookup** +#' +#' This example demonstrates how to map indices to tokens using this layer. +#' (You can also use `adapt()` with `inverse = TRUE`, but for simplicity we'll +#' pass the vocab in this example.) +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' data <- op_array(c(1, 3, 4, +#' 4, 0, 2)) |> op_reshape(c(2,-1)) |> op_cast("int32") +#' layer <- layer_integer_lookup(vocabulary = vocab, invert = TRUE) +#' layer(data) +#' ``` +#' +#' Note that the first index correspond to the oov token by default. +#' +#' **Forward and inverse lookup pairs** +#' +#' This example demonstrates how to use the vocabulary of a standard lookup +#' layer to create an inverse lookup layer. +#' +#' ```{r} +#' vocab <- c(12, 36, 1138, 42) |> as.integer() +#' data <- op_array(rbind(c(12, 1138, 42), c(42, 1000, 36)), "int32") +#' layer <- layer_integer_lookup(vocabulary = vocab) +#' i_layer <- layer_integer_lookup(vocabulary = get_vocabulary(layer), +#' invert = TRUE) +#' int_data <- layer(data) +#' i_layer(int_data) +#' ``` +#' +#' In this example, the input token 1000 resulted in an output of -1, since +#' 1000 was not in the vocabulary - it got represented as an OOV, and all OOV +#' tokens are returned as -1 in the inverse layer. Also, note that for the +#' inverse to work, you must have already set the forward layer vocabulary +#' either directly or via `adapt()` before calling `get_vocabulary()`. +#' +#' @param max_tokens +#' Maximum size of the vocabulary for this layer. This should +#' only be specified when adapting the vocabulary or when setting +#' `pad_to_max_tokens=TRUE`. If NULL, there is no cap on the size of +#' the vocabulary. Note that this size includes the OOV +#' and mask tokens. Defaults to `NULL`. +#' +#' @param num_oov_indices +#' The number of out-of-vocabulary tokens to use. +#' If this value is more than 1, OOV inputs are modulated to +#' determine their OOV value. +#' If this value is 0, OOV inputs will cause an error when calling +#' the layer. Defaults to `1`. +#' +#' @param mask_token +#' An integer token that represents masked inputs. When +#' `output_mode` is `"int"`, the token is included in vocabulary +#' and mapped to index 0. In other output modes, +#' the token will not appear in the vocabulary and instances +#' of the mask token in the input will be dropped. +#' If set to NULL, no mask term will be added. Defaults to `NULL`. +#' +#' @param oov_token +#' Only used when `invert` is `TRUE`. The token to return +#' for OOV indices. Defaults to `-1`. +#' +#' @param vocabulary +#' Optional. Either an array of integers or a string path to a +#' text file. If passing an array, can pass a list, list, +#' 1D NumPy array, or 1D tensor containing the integer vocbulary terms. +#' If passing a file path, the file should contain one line per term +#' in the vocabulary. If this argument is set, +#' there is no need to `adapt()` the layer. +#' +#' @param vocabulary_dtype +#' The dtype of the vocabulary terms, for example +#' `"int64"` or `"int32"`. Defaults to `"int64"`. +#' +#' @param idf_weights +#' Only valid when `output_mode` is `"tf_idf"`. +#' A list, list, 1D NumPy array, or 1D tensor or the same length +#' as the vocabulary, containing the floating point inverse document +#' frequency weights, which will be multiplied by per sample term +#' counts for the final TF-IDF weight. +#' If the `vocabulary` argument is set, and `output_mode` is +#' `"tf_idf"`, this argument must be supplied. +#' +#' @param invert +#' Only valid when `output_mode` is `"int"`. +#' If `TRUE`, this layer will map indices to vocabulary items +#' instead of mapping vocabulary items to indices. +#' Defaults to `FALSE`. +#' +#' @param output_mode +#' Specification for the output of the layer. Values can be +#' `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` +#' configuring the layer as follows: +#' - `"int"`: Return the vocabulary indices of the input tokens. +#' - `"one_hot"`: Encodes each individual element in the input into an +#' array the same size as the vocabulary, +#' containing a 1 at the element index. If the last dimension +#' is size 1, will encode on that dimension. +#' If the last dimension is not size 1, will append a new +#' dimension for the encoded output. +#' - `"multi_hot"`: Encodes each sample in the input into a single +#' array the same size as the vocabulary, +#' containing a 1 for each vocabulary term present in the sample. +#' Treats the last dimension as the sample dimension, +#' if input shape is `(..., sample_length)`, +#' output shape will be `(..., num_tokens)`. +#' - `"count"`: As `"multi_hot"`, but the int array contains +#' a count of the number of times the token at that index +#' appeared in the sample. +#' - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is +#' applied to find the value in each token slot. +#' For `"int"` output, any shape of input and output is supported. +#' For all other output modes, currently only output up to rank 2 +#' is supported. Defaults to `"int"`. +#' +#' @param pad_to_max_tokens +#' Only applicable when `output_mode` is `"multi_hot"`, +#' `"count"`, or `"tf_idf"`. If `TRUE`, the output will have +#' its feature axis padded to `max_tokens` even if the number +#' of unique tokens in the vocabulary is less than `max_tokens`, +#' resulting in a tensor of shape `(batch_size, max_tokens)` +#' regardless of vocabulary size. Defaults to `FALSE`. +#' +#' @param sparse +#' Boolean. Only applicable to `"multi_hot"`, `"count"`, and +#' `"tf_idf"` output modes. Only supported with TensorFlow +#' backend. If `TRUE`, returns a `SparseTensor` +#' instead of a dense `Tensor`. Defaults to `FALSE`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family categorical features preprocessing layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_random_translation <- -function(object, height_factor, width_factor, fill_mode = "reflect", - interpolation = "bilinear", seed = NULL, fill_value = 0, ...) +#' + +# + +#' +#' @tether keras.layers.IntegerLookup +layer_integer_lookup <- +function (object, max_tokens = NULL, num_oov_indices = 1L, mask_token = NULL, + oov_token = -1L, vocabulary = NULL, vocabulary_dtype = "int64", + idf_weights = NULL, invert = FALSE, output_mode = "int", + sparse = FALSE, pad_to_max_tokens = FALSE, name = NULL, ...) { - require_tf_version("2.6", "layer_random_translation()") - args <- capture_args(match.call(), - list(seed = as_nullable_integer, - interpolation = fix_string, - fill_mode = fix_string), - ignore = "object") - create_layer(keras$layers$RandomTranslation, object, args) + args <- capture_args(list(num_oov_indices = as_integer, + mask_token = as_integer, oov_token = as_integer, vocabulary = as_integer, + invert = as_integer, output_mode = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$IntegerLookup, object, args) } -#' Randomly rotate each image +#' A preprocessing layer that normalizes continuous features. #' -#' @details -#' By default, random rotations are only applied during training. -#' At inference time, the layer does nothing. If you need to apply random -#' rotations at inference time, set `training` to TRUE when calling the layer. +#' @description +#' This layer will shift and scale inputs into a distribution centered around +#' 0 with standard deviation 1. It accomplishes this by precomputing the mean +#' and variance of the data, and calling `(input - mean) / sqrt(var)` at +#' runtime. #' -#' Input shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format +#' The mean and variance values for the layer must be either supplied on +#' construction or learned via `adapt()`. `adapt()` will compute the mean and +#' variance of the data and store them as the layer's weights. `adapt()` should +#' be called before `fit()`, `evaluate()`, or `predict()`. #' -#' Output shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format +#' # Examples +#' Calculate a global mean and variance by analyzing the dataset in `adapt()`. +#' +#' ```{r} +#' adapt_data <- op_array(c(1., 2., 3., 4., 5.), dtype='float32') +#' input_data <- op_array(c(1., 2., 3.), dtype='float32') +#' layer <- layer_normalization(axis = NULL) +#' layer %>% adapt(adapt_data) +#' layer(input_data) +#' ``` +#' +#' Calculate a mean and variance for each index on the last axis. +#' +#' ```{r} +#' adapt_data <- op_array(rbind(c(0., 7., 4.), +#' c(2., 9., 6.), +#' c(0., 7., 4.), +#' c(2., 9., 6.)), dtype='float32') +#' input_data <- op_array(matrix(c(0., 7., 4.), nrow = 1), dtype='float32') +#' layer <- layer_normalization(axis=-1) +#' layer %>% adapt(adapt_data) +#' layer(input_data) +#' ``` +#' +#' Pass the mean and variance directly. +#' +#' ```{r} +#' input_data <- op_array(rbind(1, 2, 3), dtype='float32') +#' layer <- layer_normalization(mean=3., variance=2.) +#' layer(input_data) +#' ``` +#' +#' Use the layer to de-normalize inputs (after adapting the layer). +#' +#' ```{r} +#' adapt_data <- op_array(rbind(c(0., 7., 4.), +#' c(2., 9., 6.), +#' c(0., 7., 4.), +#' c(2., 9., 6.)), dtype='float32') +#' input_data <- op_array(c(1., 2., 3.), dtype='float32') +#' layer <- layer_normalization(axis=-1, invert=TRUE) +#' layer %>% adapt(adapt_data) +#' layer(input_data) +#' ``` +#' +#' @param axis +#' Integer, list of integers, or NULL. The axis or axes that should +#' have a separate mean and variance for each index in the shape. +#' For example, if shape is `(NULL, 5)` and `axis=1`, the layer will +#' track 5 separate mean and variance values for the last axis. +#' If `axis` is set to `NULL`, the layer will normalize +#' all elements in the input by a scalar mean and variance. +#' When `-1`, the last axis of the input is assumed to be a +#' feature dimension and is normalized per index. +#' Note that in the specific case of batched scalar inputs where +#' the only axis is the batch axis, the default will normalize +#' each index in the batch separately. +#' In this case, consider passing `axis=NULL`. Defaults to `-1`. +#' +#' @param mean +#' The mean value(s) to use during normalization. The passed value(s) +#' will be broadcast to the shape of the kept axes above; +#' if the value(s) cannot be broadcast, an error will be raised when +#' this layer's `build()` method is called. +#' +#' @param variance +#' The variance value(s) to use during normalization. The passed +#' value(s) will be broadcast to the shape of the kept axes above; +#' if the value(s) cannot be broadcast, an error will be raised when +#' this layer's `build()` method is called. +#' +#' @param invert +#' If `TRUE`, this layer will apply the inverse transformation +#' to its inputs: it would turn a normalized input back into its +#' original form. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family numerical features preprocessing layers +#' @family preprocessing layers +#' @family layers +#' @seealso +#' + +# + #' -#' @inheritParams layer_dense +#' @tether keras.layers.Normalization +layer_normalization <- +function (object, axis = -1L, mean = NULL, variance = NULL, invert = FALSE, + ...) +{ + args <- capture_args(list(axis = as_axis, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Normalization, object, args) +} + + +#' A preprocessing layer which randomly adjusts brightness during training. #' -#' @param factor a float represented as fraction of 2 Pi, or a list of size 2 -#' representing lower and upper bound for rotating clockwise and -#' counter-clockwise. A positive values means rotating counter clock-wise, -#' while a negative value means clock-wise. When represented as a single -#' float, this value is used for both the upper and lower bound. For -#' instance, `factor = c(-0.2, 0.3)` results in an output rotation by a random -#' amount in the range `[-20% * 2pi, 30% * 2pi]`. `factor = 0.2` results in an -#' output rotating by a random amount in the range `[-20% * 2pi, 20% * 2pi]`. -#' -#' @param fill_mode Points outside the boundaries of the input are filled according -#' to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). -#' - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by -#' reflecting about the edge of the last pixel. -#' - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by -#' filling all values beyond the edge with the same constant value k = 0. -#' - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by -#' wrapping around to the opposite edge. -#' - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the -#' nearest pixel. +#' @description +#' This layer will randomly increase/reduce the brightness for the input RGB +#' images. At inference time, the output will be identical to the input. +#' Call the layer with `training=TRUE` to adjust the brightness of the input. #' -#' @param interpolation Interpolation mode. Supported values: `"nearest"`, -#' `"bilinear"`. +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). #' -#' @param seed Integer. Used to create a random seed. +#' # Inputs +#' 3D (HWC) or 4D (NHWC) tensor, with float or int dtype. Input pixel +#' values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) #' -#' @param fill_value a float represents the value to be filled outside the boundaries -#' when `fill_mode="constant"`. +#' # Output +#' 3D (HWC) or 4D (NHWC) tensor with brightness adjusted based on the +#' `factor`. By default, the layer will output floats. +#' The output value will be clipped to the range `[0, 255]`, +#' the valid range of RGB colors, and +#' rescaled based on the `value_range` if needed. #' -#' @param ... standard layer arguments. +#' # Example #' +#' ```{r} +#' random_bright <- layer_random_brightness(factor=0.2, seed = 1) #' -#' @family image augmentation layers -#' @family preprocessing layers +#' # An image with shape [2, 2, 3] +#' image <- array(1:12, dim=c(2, 2, 3)) #' -#' @seealso -#' - -#' - -#' @export -layer_random_rotation <- -function(object, factor, fill_mode = "reflect", interpolation = "bilinear", - seed = NULL, fill_value = 0, ...) -{ - require_tf_version("2.6", "layer_random_rotation()") - args <- capture_args(match.call(), - list(seed = as_nullable_integer, - interpolation = fix_string, - fill_mode = fix_string), - ignore = "object") - create_layer(keras$layers$RandomRotation, object, args) -} - - -#' A preprocessing layer which randomly zooms images during training. +#' # Assume we randomly select the factor to be 0.1, then it will apply +#' # 0.1 * 255 to all the channel +#' output <- random_bright(image, training=TRUE) +#' output +#' ``` #' -#' This layer will randomly zoom in or out on each axis of an image -#' independently, filling empty space according to fill_mode. -#' -#' @inheritParams layer_dense -#' -#' @param height_factor a float represented as fraction of value, or a list of size -#' 2 representing lower and upper bound for zooming vertically. When -#' represented as a single float, this value is used for both the upper and -#' lower bound. A positive value means zooming out, while a negative value -#' means zooming in. For instance, `height_factor = c(0.2, 0.3)` result in an -#' output zoomed out by a random amount in the range `[+20%, +30%]`. -#' `height_factor = c(-0.3, -0.2)` result in an output zoomed in by a random -#' amount in the range `[+20%, +30%]`. -#' -#' @param width_factor a float represented as fraction of value, or a list of size 2 -#' representing lower and upper bound for zooming horizontally. When -#' represented as a single float, this value is used for both the upper and -#' lower bound. For instance, `width_factor = c(0.2, 0.3)` result in an output -#' zooming out between 20% to 30%. `width_factor = c(-0.3, -0.2)` result in an -#' output zooming in between 20% to 30%. Defaults to `NULL`, i.e., zooming -#' vertical and horizontal directions by preserving the aspect ratio. -#' -#' @param fill_mode Points outside the boundaries of the input are filled according -#' to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). -#' - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by -#' reflecting about the edge of the last pixel. -#' - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by -#' filling all values beyond the edge with the same constant value k = 0. -#' - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by -#' wrapping around to the opposite edge. -#' - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the -#' nearest pixel. +#' @param factor +#' Float or a list of 2 floats between -1.0 and 1.0. The +#' factor is used to determine the lower bound and upper bound of the +#' brightness adjustment. A float value will be chosen randomly between +#' the limits. When -1.0 is chosen, the output image will be black, and +#' when 1.0 is chosen, the image will be fully white. +#' When only one float is provided, eg, 0.2, +#' then -0.2 will be used for lower bound and 0.2 +#' will be used for upper bound. #' -#' @param interpolation Interpolation mode. Supported values: `"nearest"`, -#' `"bilinear"`. +#' @param value_range +#' Optional list of 2 floats +#' for the lower and upper limit +#' of the values of the input data. +#' To make no change, use `c(0.0, 1.0)`, e.g., if the image input +#' has been scaled before this layer. Defaults to `c(0.0, 255.0)`. +#' The brightness adjustment will be scaled to this range, and the +#' output values will be clipped to this range. #' -#' @param seed Integer. Used to create a random seed. +#' @param seed +#' optional integer, for fixed RNG behavior. #' -#' @param fill_value a float represents the value to be filled outside the boundaries -#' when `fill_mode="constant"`. +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @param ... standard layer arguments. +#' @param ... +#' For forward/backward compatability. #' +#' @inherit layer_dense return +#' @export #' @family image augmentation layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_random_zoom <- -function(object, height_factor, width_factor = NULL, fill_mode = "reflect", - interpolation = "bilinear", seed = NULL, fill_value = 0, ...) +#' + +# + +#' +#' @tether keras.layers.RandomBrightness +layer_random_brightness <- +function (object, factor, value_range = list(0L, 255L), seed = NULL, + ...) { - require_tf_version("2.6", "layer_random_zoom()") - args <- capture_args(match.call(), - list(seed = as_nullable_integer, - interpolation = fix_string, - fill_mode = fix_string), - ignore = "object") - create_layer(keras$layers$RandomZoom, object, args) + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RandomBrightness, object, args) } -#' Adjust the contrast of an image or images by a random factor +#' A preprocessing layer which randomly adjusts contrast during training. #' -#' @details -#' Contrast is adjusted independently for each channel of each image during -#' training. +#' @description +#' This layer will randomly adjust the contrast of an image or images +#' by a random factor. Contrast is adjusted independently +#' for each channel of each image during training. #' #' For each channel, this layer computes the mean of the image pixels in the #' channel and then adjusts each component `x` of each pixel to #' `(x - mean) * contrast_factor + mean`. #' -#' Input shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format. +#' Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and +#' in integer or floating point dtype. +#' By default, the layer will output floats. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format. #' -#' Output shape: -#' 3D (unbatched) or 4D (batched) tensor with shape: -#' `(..., height, width, channels)`, in `"channels_last"` format. +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format. #' -#' @inheritParams layer_dense +#' @param factor +#' a positive float represented as fraction of value, or a tuple of +#' size 2 representing lower and upper bound. +#' When represented as a single float, lower = upper. +#' The contrast factor will be randomly picked between +#' `[1.0 - lower, 1.0 + upper]`. For any pixel x in the channel, +#' the output will be `(x - mean) * factor + mean` +#' where `mean` is the mean value of the channel. #' -#' @param factor a positive float represented as fraction of value, or a list of -#' size 2 representing lower and upper bound. When represented as a single -#' float, lower = upper. The contrast factor will be randomly picked between -#' `[1.0 - lower, 1.0 + upper]`. +#' @param seed +#' Integer. Used to create a random seed. #' -#' @param seed Integer. Used to create a random seed. +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @param ... standard layer arguments. +#' @param ... +#' For forward/backward compatability. #' +#' @inherit layer_dense return +#' @export #' @family image augmentation layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export +#' + +# + +#' @tether keras.layers.RandomContrast layer_random_contrast <- -function(object, factor, seed = NULL, ...) +function (object, factor, seed = NULL, ...) { - require_tf_version("2.6", "layer_random_contrast()") - args <- capture_args(match.call(), list(seed = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$RandomContrast, object, args) + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RandomContrast, object, args) } -#' Randomly vary the height of a batch of images during training +#' A preprocessing layer which randomly crops images during training. #' -#' @details -#' Adjusts the height of a batch of images by a random factor. The input -#' should be a 3D (unbatched) or 4D (batched) tensor in the `"channels_last"` -#' image data format. +#' @description +#' During training, this layer will randomly choose a location to crop images +#' down to a target size. The layer will crop all the images in the same batch +#' to the same cropping location. +#' +#' At inference time, and during training if an input image is smaller than the +#' target size, the input will be resized and cropped so as to return the +#' largest possible window in the image that matches the target aspect ratio. +#' If you need to apply random cropping at inference time, set `training` to +#' TRUE when calling the layer. +#' +#' Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and +#' of integer or floating point dtype. By default, the layer will output +#' floats. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format. +#' +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., target_height, target_width, channels)`. #' -#' By default, this layer is inactive during inference. +#' @param height +#' Integer, the height of the output shape. #' -#' @inheritParams layer_dense +#' @param width +#' Integer, the width of the output shape. #' -#' @param factor A positive float (fraction of original height), or a list of size 2 -#' representing lower and upper bound for resizing vertically. When -#' represented as a single float, this value is used for both the upper and -#' lower bound. For instance, `factor = c(0.2, 0.3)` results in an output with -#' height changed by a random amount in the range `[20%, 30%]`. -#' `factor = c(-0.2, 0.3)` results in an output with height changed by a random -#' amount in the range `[-20%, +30%]`. `factor=0.2` results in an output with -#' height changed by a random amount in the range `[-20%, +20%]`. +#' @param seed +#' Integer. Used to create a random seed. #' -#' @param interpolation String, the interpolation method. Defaults to `"bilinear"`. -#' Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, -#' `"lanczos3"`, `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. +#' @param ... +#' Base layer keyword arguments, such as +#' `name` and `dtype`. #' -#' @param seed Integer. Used to create a random seed. +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @param ... standard layer arguments. +#' @param name +#' String, name for the object #' +#' @param data_format +#' see description +#' +#' @inherit layer_dense return +#' @export #' @family image augmentation layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_random_height <- -function(object, factor, interpolation = "bilinear", seed = NULL, ...) +#' + +# + +#' @tether keras.layers.RandomCrop +layer_random_crop <- +function (object, height, width, seed = NULL, data_format = NULL, + name = NULL, ...) { - require_tf_version("2.6", "layer_random_height()") - args <- capture_args(match.call(), - list(seed = as_nullable_integer, - interpolation = fix_string), - ignore = "object") - create_layer(keras$layers$RandomHeight, object, args) + args <- capture_args(list(height = as_integer, width = as_integer, + seed = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$RandomCrop, object, args) } -#' Randomly vary the width of a batch of images during training +#' A preprocessing layer which randomly flips images during training. #' -#' @details -#' Adjusts the width of a batch of images by a random factor. The input -#' should be a 3D (unbatched) or 4D (batched) tensor in the `"channels_last"` -#' image data format. +#' @description +#' This layer will flip the images horizontally and or vertically based on the +#' `mode` attribute. During inference time, the output will be identical to +#' input. Call the layer with `training=TRUE` to flip the input. +#' Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and +#' of integer or floating point dtype. +#' By default, the layer will output floats. #' -#' By default, this layer is inactive during inference. +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). #' -#' @inheritParams layer_dense +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format. #' -#' @param factor A positive float (fraction of original height), or a list of size 2 -#' representing lower and upper bound for resizing vertically. When -#' represented as a single float, this value is used for both the upper and -#' lower bound. For instance, `factor = c(0.2, 0.3)` results in an output with -#' width changed by a random amount in the range `[20%, 30%]`. `factor=(-0.2, -#' 0.3)` results in an output with width changed by a random amount in the -#' range `[-20%, +30%]`. `factor = 0.2` results in an output with width changed -#' by a random amount in the range `[-20%, +20%]`. +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format. #' -#' @param interpolation String, the interpolation method. Defaults to `bilinear`. -#' Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"area"`, `"lanczos3"`, -#' `"lanczos5"`, `"gaussian"`, `"mitchellcubic"`. +#' @param mode +#' String indicating which flip mode to use. Can be `"horizontal"`, +#' `"vertical"`, or `"horizontal_and_vertical"`. `"horizontal"` is a +#' left-right flip and `"vertical"` is a top-bottom flip. Defaults to +#' `"horizontal_and_vertical"` #' -#' @param seed Integer. Used to create a random seed. +#' @param seed +#' Integer. Used to create a random seed. #' -#' @param ... standard layer arguments. +#' @param ... +#' Base layer keyword arguments, such as +#' `name` and `dtype`. #' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return +#' @export #' @family image augmentation layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_random_width <- -function(object, factor, interpolation = "bilinear", seed = NULL, ...) +#' + +# + +#' @tether keras.layers.RandomFlip +layer_random_flip <- +function (object, mode = "horizontal_and_vertical", seed = NULL, + ...) { - require_tf_version("2.6", "layer_random_width()") - args <- capture_args(match.call(), - list(seed = as_nullable_integer, - interpolation = fix_string), - ignore = "object") - create_layer(keras$layers$RandomWidth, object, args) + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RandomFlip, object, args) } -#' A preprocessing layer which randomly adjusts brightness during training +#' A preprocessing layer which randomly rotates images during training. #' -#' @details -#' This layer will randomly increase/reduce the brightness for the input RGB -#' images. At inference time, the output will be identical to the input. -#' Call the layer with `training=TRUE` to adjust the brightness of the input. +#' @description +#' This layer will apply random rotations to each image, filling empty space +#' according to `fill_mode`. #' -#' Note that different brightness adjustment factors -#' will be apply to each the images in the batch. +#' By default, random rotations are only applied during training. +#' At inference time, the layer does nothing. If you need to apply random +#' rotations at inference time, pass `training = TRUE` when calling the layer. #' -#' For an overview and full list of preprocessing layers, see the preprocessing -#' [guide](https://www.tensorflow.org/guide/keras/preprocessing_layers). +#' Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and +#' of integer or floating point dtype. +#' By default, the layer will output floats. #' -#' @param factor Float or a list of 2 floats between -1.0 and 1.0. The -#' factor is used to determine the lower bound and upper bound of the -#' brightness adjustment. A float value will be chosen randomly between -#' the limits. When -1.0 is chosen, the output image will be black, and -#' when 1.0 is chosen, the image will be fully white. When only one float -#' is provided, eg, 0.2, then -0.2 will be used for lower bound and 0.2 -#' will be used for upper bound. +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). #' -#' @param value_range Optional list of 2 floats for the lower and upper limit -#' of the values of the input data. Defaults to `[0.0, 255.0]`. Can be changed -#' to e.g. `[0.0, 1.0]` if the image input has been scaled before this layer. -#' The brightness adjustment will be scaled to this range, and the -#' output values will be clipped to this range. +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format +#' +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format +#' +#' @param factor +#' a float represented as fraction of 2 Pi, or a tuple of size 2 +#' representing lower and upper bound for rotating clockwise and +#' counter-clockwise. A positive values means rotating +#' counter clock-wise, +#' while a negative value means clock-wise. +#' When represented as a single +#' float, this value is used for both the upper and lower bound. +#' For instance, `factor=(-0.2, 0.3)` +#' results in an output rotation by a random +#' amount in the range `[-20% * 2pi, 30% * 2pi]`. +#' `factor=0.2` results in an +#' output rotating by a random amount +#' in the range `[-20% * 2pi, 20% * 2pi]`. +#' +#' @param fill_mode +#' Points outside the boundaries of the input are filled +#' according to the given mode +#' (one of `{"constant", "reflect", "wrap", "nearest"}`). +#' - *reflect*: `(d c b a | a b c d | d c b a)` +#' The input is extended by reflecting about +#' the edge of the last pixel. +#' - *constant*: `(k k k k | a b c d | k k k k)` +#' The input is extended by +#' filling all values beyond the edge with +#' the same constant value k = 0. +#' - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by +#' wrapping around to the opposite edge. +#' - *nearest*: `(a a a a | a b c d | d d d d)` +#' The input is extended by the nearest pixel. #' -#' @param seed optional integer, for fixed RNG behavior. -#' @param ... standard layer arguments. +#' @param interpolation +#' Interpolation mode. Supported values: `"nearest"`, +#' `"bilinear"`. +#' +#' @param seed +#' Integer. Used to create a random seed. +#' +#' @param fill_value +#' a float represents the value to be filled outside +#' the boundaries when `fill_mode="constant"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @inheritParams layer_dense +#' @param ... +#' For forward/backward compatability. #' +#' @param value_range +#' see description +#' +#' @param data_format +#' see description +#' +#' @inherit layer_dense return +#' @export #' @family image augmentation layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' + -#' + -#' @export -layer_random_brightness <- -function(object, factor, value_range = c(0, 255), seed = NULL, ...) +#' + +# + +#' @tether keras.layers.RandomRotation +layer_random_rotation <- +function (object, factor, fill_mode = "reflect", interpolation = "bilinear", + seed = NULL, fill_value = 0, value_range = list(0L, 255L), + data_format = NULL, ...) { - require_tf_version("2.9", "layer_random_brightness()") - args <- capture_args(match.call(), list(seed = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$RandomBrightness, object, args) + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RandomRotation, object, args) } - - - - - - - - - - - - - - - - - -## ---- categorical features preprocessing ---- - - -#' A preprocessing layer which encodes integer features. +#' A preprocessing layer which randomly translates images during training. #' #' @description +#' This layer will apply random translations to each image during training, +#' filling empty space according to `fill_mode`. +#' +#' Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and +#' of integer or floating point dtype. By default, the layer will output +#' floats. +#' +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format, +#' or `(..., channels, height, width)`, in `"channels_first"` format. +#' +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., target_height, target_width, channels)`, +#' or `(..., channels, target_height, target_width)`, +#' in `"channels_first"` format. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' @param height_factor +#' a float represented as fraction of value, or a tuple of +#' size 2 representing lower and upper bound for shifting vertically. A +#' negative value means shifting image up, while a positive value means +#' shifting image down. When represented as a single positive float, +#' this value is used for both the upper and lower bound. For instance, +#' `height_factor=(-0.2, 0.3)` results in an output shifted by a random +#' amount in the range `[-20%, +30%]`. `height_factor=0.2` results in +#' an output height shifted by a random amount in the range +#' `[-20%, +20%]`. +#' +#' @param width_factor +#' a float represented as fraction of value, or a tuple of +#' size 2 representing lower and upper bound for shifting horizontally. +#' A negative value means shifting image left, while a positive value +#' means shifting image right. When represented as a single positive +#' float, this value is used for both the upper and lower bound. For +#' instance, `width_factor=(-0.2, 0.3)` results in an output shifted +#' left by 20%, and shifted right by 30%. `width_factor=0.2` results +#' in an output height shifted left or right by 20%. +#' +#' @param fill_mode +#' Points outside the boundaries of the input are filled +#' according to the given mode. Available methods are `"constant"`, +#' `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. +#' - `"reflect"`: `(d c b a | a b c d | d c b a)` +#' The input is extended by reflecting about the edge of the last +#' pixel. +#' - `"constant"`: `(k k k k | a b c d | k k k k)` +#' The input is extended by filling all values beyond +#' the edge with the same constant value k specified by +#' `fill_value`. +#' - `"wrap"`: `(a b c d | a b c d | a b c d)` +#' The input is extended by wrapping around to the opposite edge. +#' - `"nearest"`: `(a a a a | a b c d | d d d d)` +#' The input is extended by the nearest pixel. +#' Note that when using torch backend, `"reflect"` is redirected to +#' `"mirror"` `(c d c b | a b c d | c b a b)` because torch does not +#' support `"reflect"`. +#' Note that torch backend does not support `"wrap"`. +#' +#' @param interpolation +#' Interpolation mode. Supported values: `"nearest"`, +#' `"bilinear"`. #' -#' This layer provides options for condensing data into a categorical encoding -#' when the total number of tokens are known in advance. It accepts integer -#' values as inputs, and it outputs a dense or sparse representation of those -#' inputs. For integer inputs where the total number of tokens is not known, use -#' [`layer_integer_lookup()`] instead. +#' @param seed +#' Integer. Used to create a random seed. #' -#' @inheritParams layer_dense +#' @param fill_value +#' a float represents the value to be filled outside the +#' boundaries when `fill_mode="constant"`. #' -#' @param num_tokens The total number of tokens the layer should support. All -#' inputs to the layer must integers in the range `0 <= value < num_tokens`, -#' or an error will be thrown. +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. #' -#' @param output_mode Specification for the output of the layer. Defaults to -#' `"multi_hot"`. Values can be `"one_hot"`, `"multi_hot"` or `"count"`, -#' configuring the layer as follows: +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. #' -#' - `"one_hot"`: Encodes each individual element in the input into an array -#' of `num_tokens` size, containing a 1 at the element index. If the last -#' dimension is size 1, will encode on that dimension. If the last dimension -#' is not size 1, will append a new dimension for the encoded output. +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' - `"multi_hot"`: Encodes each sample in the input into a single array of -#' `num_tokens` size, containing a 1 for each vocabulary term present in the -#' sample. Treats the last dimension as the sample dimension, if input shape -#' is `(..., sample_length)`, output shape will be `(..., num_tokens)`. +#' @inherit layer_dense return +#' @export +#' @family image augmentation layers +#' @family preprocessing layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.RandomTranslation +layer_random_translation <- +function (object, height_factor, width_factor, fill_mode = "reflect", + interpolation = "bilinear", seed = NULL, fill_value = 0, + data_format = NULL, ...) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RandomTranslation, object, args) +} + + +#' A preprocessing layer which randomly zooms images during training. #' -#' - `"count"`: Like `"multi_hot"`, but the int array contains a count of the -#' number of times the token at that index appeared in the sample. +#' @description +#' This layer will randomly zoom in or out on each axis of an image +#' independently, filling empty space according to `fill_mode`. +#' +#' Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and +#' of integer or floating point dtype. +#' By default, the layer will output floats. +#' +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format, +#' or `(..., channels, height, width)`, in `"channels_first"` format. +#' +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., target_height, target_width, channels)`, +#' or `(..., channels, target_height, target_width)`, +#' in `"channels_first"` format. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Examples +#' ```{r} +#' input_img <- random_uniform(c(32, 224, 224, 3)) +#' layer <- layer_random_zoom(height_factor = .5, width_factor = .2) +#' out_img <- layer(input_img) +#' ``` +#' +#' @param height_factor +#' a float represented as fraction of value, or a list of +#' size 2 representing lower and upper bound for zooming vertically. +#' When represented as a single float, this value is used for both the +#' upper and lower bound. A positive value means zooming out, while a +#' negative value means zooming in. For instance, +#' `height_factor=c(0.2, 0.3)` result in an output zoomed out by a +#' random amount in the range `[+20%, +30%]`. +#' `height_factor=c(-0.3, -0.2)` result in an output zoomed in by a +#' random amount in the range `[+20%, +30%]`. +#' +#' @param width_factor +#' a float represented as fraction of value, or a list of +#' size 2 representing lower and upper bound for zooming horizontally. +#' When represented as a single float, this value is used for both the +#' upper and lower bound. For instance, `width_factor=c(0.2, 0.3)` +#' result in an output zooming out between 20% to 30%. +#' `width_factor=c(-0.3, -0.2)` result in an output zooming in between +#' 20% to 30%. `NULL` means i.e., zooming vertical and horizontal +#' directions by preserving the aspect ratio. Defaults to `NULL`. +#' +#' @param fill_mode +#' Points outside the boundaries of the input are filled +#' according to the given mode. Available methods are `"constant"`, +#' `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. +#' - `"reflect"`: `(d c b a | a b c d | d c b a)` +#' The input is extended by reflecting about the edge of the last +#' pixel. +#' - `"constant"`: `(k k k k | a b c d | k k k k)` +#' The input is extended by filling all values beyond +#' the edge with the same constant value k specified by +#' `fill_value`. +#' - `"wrap"`: `(a b c d | a b c d | a b c d)` +#' The input is extended by wrapping around to the opposite edge. +#' - `"nearest"`: `(a a a a | a b c d | d d d d)` +#' The input is extended by the nearest pixel. +#' Note that when using torch backend, `"reflect"` is redirected to +#' `"mirror"` `(c d c b | a b c d | c b a b)` because torch does not +#' support `"reflect"`. +#' Note that torch backend does not support `"wrap"`. +#' +#' @param interpolation +#' Interpolation mode. Supported values: `"nearest"`, +#' `"bilinear"`. #' -#' For all output modes, currently only output up to rank 2 is supported. +#' @param seed +#' Integer. Used to create a random seed. #' -#' @param sparse Boolean. If `TRUE`, returns a `SparseTensor` instead of a dense -#' `Tensor`. Defaults to `FALSE`. +#' @param fill_value +#' a float that represents the value to be filled outside +#' the boundaries when `fill_mode="constant"`. #' -#' @param ... standard layer arguments. +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. #' -#' @family categorical features preprocessing layers -#' @family preprocessing layers +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. #' -#' @seealso -#' - -#' - +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' +#' @inherit layer_dense return #' @export -layer_category_encoding <- -function(object, num_tokens=NULL, output_mode = "multi_hot", sparse = FALSE, ...) +#' @family image augmentation layers +#' @family preprocessing layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.RandomZoom +layer_random_zoom <- +function (object, height_factor, width_factor = NULL, fill_mode = "reflect", + interpolation = "bilinear", seed = NULL, fill_value = 0, + data_format = NULL, ...) { - require_tf_version("2.6", "layer_category_encoding()") - args <- capture_args(match.call(), - list(num_tokens = as_nullable_integer, - output_mode = fix_string), - ignore = "object") - create_layer(keras$layers$CategoryEncoding, object, args) + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RandomZoom, object, args) } -#' A preprocessing layer which hashes and bins categorical features. -#' -#' @details -#' This layer transforms single or multiple categorical inputs to hashed output. -#' It converts a sequence of int or string to a sequence of int. The stable hash -#' function uses `tensorflow::ops::Fingerprint` to produce the same output -#' consistently across all platforms. -#' -#' This layer uses [FarmHash64](https://github.com/google/farmhash) by default, -#' which provides a consistent hashed output across different platforms and is -#' stable across invocations, regardless of device and context, by mixing the -#' input bits thoroughly. +#' A preprocessing layer which rescales input values to a new range. #' -#' If you want to obfuscate the hashed output, you can also pass a random `salt` -#' argument in the constructor. In that case, the layer will use the -#' [SipHash64](https://github.com/google/highwayhash) hash function, with -#' the `salt` value serving as additional input to the hash function. +#' @description +#' This layer rescales every value of an input (often an image) by multiplying +#' by `scale` and adding `offset`. #' -#' **Example (FarmHash64)** -#' ````r -#' layer <- layer_hashing(num_bins=3) -#' inp <- matrix(c('A', 'B', 'C', 'D', 'E')) -#' layer(inp) -#' # -#' ```` +#' For instance: #' -#' **Example (FarmHash64) with a mask value** -#' ````r -#' layer <- layer_hashing(num_bins=3, mask_value='') -#' inp <- matrix(c('A', 'B', 'C', 'D', 'E')) -#' layer(inp) -#' # -#' ```` +#' 1. To rescale an input in the `[0, 255]` range +#' to be in the `[0, 1]` range, you would pass `scale=1./255`. #' -#' **Example (SipHash64)** -#' ````r -#' layer <- layer_hashing(num_bins=3, salt=c(133, 137)) -#' inp <- matrix(c('A', 'B', 'C', 'D', 'E')) -#' layer(inp) -#' # -#' ```` +#' 2. To rescale an input in the `[0, 255]` range to be in the `[-1, 1]` range, +#' you would pass `scale=1./127.5, offset=-1`. #' -#' **Example (Siphash64 with a single integer, same as `salt=[133, 133]`)** -#' ````r -#' layer <- layer_hashing(num_bins=3, salt=133) -#' inp <- matrix(c('A', 'B', 'C', 'D', 'E')) -#' layer(inp) -#' # -#' ```` +#' The rescaling is applied both during training and inference. Inputs can be +#' of integer or floating point dtype, and by default the layer will output +#' floats. #' -#' @inheritParams layer_dense +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). #' -#' @param num_bins Number of hash bins. Note that this includes the `mask_value` bin, -#' so the effective number of bins is `(num_bins - 1)` if `mask_value` is -#' set. +#' @param scale +#' Float, the scale to apply to the inputs. #' -#' @param mask_value A value that represents masked inputs, which are mapped to -#' index 0. Defaults to NULL, meaning no mask term will be added and the -#' hashing will start at index 0. +#' @param offset +#' Float, the offset to apply to the inputs. #' -#' @param salt A single unsigned integer or NULL. -#' If passed, the hash function used will be SipHash64, with these values -#' used as an additional input (known as a "salt" in cryptography). -#' These should be non-zero. Defaults to `NULL` (in that -#' case, the FarmHash64 hash function is used). It also supports -#' list of 2 unsigned integer numbers, see reference paper for details. +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. #' -#' @param ... standard layer arguments. +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' -#' @family categorical features preprocessing layers +#' @inherit layer_dense return +#' @export +#' @family image preprocessing layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - -#' - -#' @export -layer_hashing <- -function(object, num_bins, mask_value = NULL, salt = NULL, ...) +#' + +# + +#' @tether keras.layers.Rescaling +layer_rescaling <- +function (object, scale, offset = 0, ...) { - require_tf_version("2.6", "layer_hashing()") - args <- capture_args(match.call(), - list(num_bins = as.integer, - salt = as_nullable_integer), - ignore = "object") - create_layer(keras$layers$Hashing, object, args) + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Rescaling, object, args) } - -#' A preprocessing layer which maps integer features to contiguous ranges. -#' -#' @details -#' This layer maps a set of arbitrary integer input tokens into indexed -#' integer output via a table-based vocabulary lookup. The layer's output indices -#' will be contiguously arranged up to the maximum vocab size, even if the input -#' tokens are non-continguous or unbounded. The layer supports multiple options -#' for encoding the output via `output_mode`, and has optional support for -#' out-of-vocabulary (OOV) tokens and masking. -#' -#' The vocabulary for the layer can be supplied on construction or learned via -#' `adapt()`. During `adapt()`, the layer will analyze a data set, determine the -#' frequency of individual integer tokens, and create a vocabulary from them. If -#' the vocabulary is capped in size, the most frequent tokens will be used to -#' create the vocabulary and all others will be treated as OOV. -#' -#' There are two possible output modes for the layer. -#' When `output_mode` is `"int"`, -#' input integers are converted to their index in the vocabulary (an integer). -#' When `output_mode` is `"multi_hot"`, `"count"`, or `"tf_idf"`, input integers -#' are encoded into an array where each dimension corresponds to an element in -#' the vocabulary. +#' A preprocessing layer which resizes images. #' -#' The vocabulary for the layer must be either supplied on construction or -#' learned via `adapt()`. During `adapt()`, the layer will analyze a data set, -#' determine the frequency of individual integer tokens, and create a vocabulary -#' from them. If the vocabulary is capped in size, the most frequent tokens will -#' be used to create the vocabulary and all others will be treated as OOV. -#' -#' @inheritParams layer_dense -#' -#' @param max_tokens The maximum size of the vocabulary for this layer. If `NULL`, -#' there is no cap on the size of the vocabulary. Note that this size -#' includes the OOV and mask tokens. Default to `NULL.` -#' -#' @param num_oov_indices The number of out-of-vocabulary tokens to use. If this -#' value is more than 1, OOV inputs are modulated to determine their OOV -#' value. If this value is 0, OOV inputs will cause an error when calling the -#' layer. Defaults to 1. -#' -#' @param mask_token An integer token that represents masked inputs. When -#' `output_mode` is `"int"`, the token is included in vocabulary and mapped -#' to index 0. In other output modes, the token will not appear in the -#' vocabulary and instances of the mask token in the input will be dropped. -#' If set to `NULL`, no mask term will be added. Defaults to `NULL`. -#' -#' @param oov_token Only used when `invert` is `TRUE.` The token to return for OOV -#' indices. Defaults to -1. -#' -#' @param vocabulary Optional. Either an array of integers or a string path to a text -#' file. If passing an array, can pass a list, list, 1D numpy array, or 1D -#' tensor containing the integer vocabulary terms. If passing a file path, the -#' file should contain one line per term in the vocabulary. If this argument -#' is set, there is no need to `adapt` the layer. -#' -#' @param invert Only valid when `output_mode` is `"int"`. If `TRUE`, this layer will -#' map indices to vocabulary items instead of mapping vocabulary items to -#' indices. Default to `FALSE`. -#' -#' @param output_mode Specification for the output of the layer. Defaults to `"int"`. -#' Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or -#' `"tf_idf"` configuring the layer as follows: -#' - `"int"`: Return the vocabulary indices of the input tokens. -#' - `"one_hot"`: Encodes each individual element in the input into an -#' array the same size as the vocabulary, containing a 1 at the element -#' index. If the last dimension is size 1, will encode on that dimension. -#' If the last dimension is not size 1, will append a new dimension for -#' the encoded output. -#' - `"multi_hot"`: Encodes each sample in the input into a single array -#' the same size as the vocabulary, containing a 1 for each vocabulary -#' term present in the sample. Treats the last dimension as the sample -#' dimension, if input shape is (..., sample_length), output shape will -#' be (..., num_tokens). -#' - `"count"`: As `"multi_hot"`, but the int array contains a count of the -#' number of times the token at that index appeared in the sample. -#' - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to -#' find the value in each token slot. -#' For `"int"` output, any shape of input and output is supported. For all -#' other output modes, currently only output up to rank 2 is supported. -#' -#' @param pad_to_max_tokens Only applicable when `output_mode` is `"multi_hot"`, -#' `"count"`, or `"tf_idf"`. If TRUE, the output will have its feature axis -#' padded to `max_tokens` even if the number of unique tokens in the -#' vocabulary is less than max_tokens, resulting in a tensor of shape -#' `[batch_size, max_tokens]` regardless of vocabulary size. Defaults to `FALSE`. -#' -#' @param sparse Boolean. Only applicable when `output_mode` is `"multi_hot"`, -#' `"count"`, or `"tf_idf"`. If `TRUE`, returns a `SparseTensor` instead of a -#' dense `Tensor`. Defaults to `FALSE`. -#' -#' @param ... standard layer arguments. -#' -#' @family categorical features preprocessing layers +#' @description +#' This layer resizes an image input to a target height and width. The input +#' should be a 4D (batched) or 3D (unbatched) tensor in `"channels_last"` +#' format. Input pixel values can be of any range +#' (e.g. `[0., 1.)` or `[0, 255]`). +#' +#' # Input Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., height, width, channels)`, in `"channels_last"` format, +#' or `(..., channels, height, width)`, in `"channels_first"` format. +#' +#' # Output Shape +#' 3D (unbatched) or 4D (batched) tensor with shape: +#' `(..., target_height, target_width, channels)`, +#' or `(..., channels, target_height, target_width)`, +#' in `"channels_first"` format. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' @param height +#' Integer, the height of the output shape. +#' +#' @param width +#' Integer, the width of the output shape. +#' +#' @param interpolation +#' String, the interpolation method. +#' Supports `"bilinear"`, `"nearest"`, `"bicubic"`, +#' `"lanczos3"`, `"lanczos5"`. Defaults to `"bilinear"`. +#' +#' @param crop_to_aspect_ratio +#' If `TRUE`, resize the images without aspect +#' ratio distortion. When the original aspect ratio differs +#' from the target aspect ratio, the output image will be +#' cropped so as to return the +#' largest possible window in the image (of size `(height, width)`) +#' that matches the target aspect ratio. By default +#' (`crop_to_aspect_ratio=FALSE`), aspect ratio may not be preserved. +#' +#' @param pad_to_aspect_ratio +#' If `TRUE`, pad the images without aspect +#' ratio distortion. When the original aspect ratio differs +#' from the target aspect ratio, the output image will be +#' evenly padded on the short side. +#' +#' @param fill_mode +#' When using `pad_to_aspect_ratio=TRUE`, padded areas +#' are filled according to the given mode. Only `"constant"` is +#' supported at this time +#' (fill with constant value, equal to `fill_value`). +#' +#' @param fill_value +#' Float. Padding value to use when `pad_to_aspect_ratio=TRUE`. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, width)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @param ... +#' Base layer keyword arguments, such as `name` and `dtype`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @inherit layer_dense return +#' @export +#' @family image preprocessing layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - [`adapt()`] -#' - -#' - -#' @export -layer_integer_lookup <- -function(object, - max_tokens = NULL, - num_oov_indices = 1L, - mask_token = NULL, - oov_token = -1L, - vocabulary = NULL, - invert = FALSE, - output_mode = 'int', - sparse = FALSE, - pad_to_max_tokens = FALSE, - ...) +#' + +# + +#' @tether keras.layers.Resizing +layer_resizing <- +function (object, height, width, interpolation = "bilinear", + crop_to_aspect_ratio = FALSE, + pad_to_aspect_ratio = FALSE, fill_mode = "constant", fill_value = 0, + data_format = NULL, ...) { - require_tf_version("2.6", "layer_integer_lookup()") - args <- capture_args(match.call(), - list(num_oov_indices = as.integer, - oov_token = as.integer, - output_mode = fix_string), - ignore = "object") - create_layer(keras$layers$IntegerLookup, object, args) + args <- capture_args(list(height = as_integer, width = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Resizing, object, args) } - -#' A preprocessing layer which maps string features to integer indices. +#' A preprocessing layer that maps strings to (possibly encoded) indices. #' -#' @details +#' @description #' This layer translates a set of arbitrary strings into integer output via a -#' table-based vocabulary lookup. +#' table-based vocabulary lookup. This layer will perform no splitting or +#' transformation of input strings. For a layer than can split and tokenize +#' natural language, see the `layer_text_vectorization` layer. #' #' The vocabulary for the layer must be either supplied on construction or #' learned via `adapt()`. During `adapt()`, the layer will analyze a data set, -#' determine the frequency of individual strings tokens, and create a vocabulary -#' from them. If the vocabulary is capped in size, the most frequent tokens will -#' be used to create the vocabulary and all others will be treated as -#' out-of-vocabulary (OOV). +#' determine the frequency of individual strings tokens, and create a +#' vocabulary from them. If the vocabulary is capped in size, the most frequent +#' tokens will be used to create the vocabulary and all others will be treated +#' as out-of-vocabulary (OOV). #' #' There are two possible output modes for the layer. #' When `output_mode` is `"int"`, @@ -951,231 +1776,325 @@ function(object, #' The vocabulary can optionally contain a mask token as well as an OOV token #' (which can optionally occupy multiple indices in the vocabulary, as set #' by `num_oov_indices`). -#' The position of these tokens in the vocabulary is fixed. When `output_mode` is -#' `"int"`, the vocabulary will begin with the mask token (if set), followed by -#' OOV indices, followed by the rest of the vocabulary. When `output_mode` is -#' `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with OOV -#' indices and instances of the mask token will be dropped. -#' -#' @inheritParams layer_dense -#' -#' @param max_tokens The maximum size of the vocabulary for this layer. If `NULL`, -#' there is no cap on the size of the vocabulary. Note that this size -#' includes the OOV and mask tokens. Default to `NULL.` -#' -#' @param num_oov_indices The number of out-of-vocabulary tokens to use. If this -#' value is more than 1, OOV inputs are hashed to determine their OOV value. -#' If this value is 0, OOV inputs will cause an error when calling the layer. -#' Defaults to 1. -#' -#' @param mask_token A token that represents masked inputs. When `output_mode` is -#' `"int"`, the token is included in vocabulary and mapped to index 0. In -#' other output modes, the token will not appear in the vocabulary and -#' instances of the mask token in the input will be dropped. If set to `NULL`, +#' The position of these tokens in the vocabulary is fixed. When `output_mode` +#' is `"int"`, the vocabulary will begin with the mask token (if set), followed +#' by OOV indices, followed by the rest of the vocabulary. When `output_mode` +#' is `"multi_hot"`, `"count"`, or `"tf_idf"` the vocabulary will begin with +#' OOV indices and instances of the mask token will be dropped. +#' +#' **Note:** This layer uses TensorFlow internally. It cannot +#' be used as part of the compiled computation graph of a model with +#' any backend other than TensorFlow. +#' It can however be used with any backend when running eagerly. +#' It can also always be used as part of an input preprocessing pipeline +#' with any backend (outside the model itself), which is how we recommend +#' to use this layer. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Examples +#' **Creating a lookup layer with a known vocabulary** +#' +#' This example creates a lookup layer with a pre-existing vocabulary. +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' data <- rbind(c("a", "c", "d"), c("d", "z", "b")) +#' layer <- layer_string_lookup(vocabulary=vocab) +#' layer(data) +#' ``` +#' +#' **Creating a lookup layer with an adapted vocabulary** +#' +#' This example creates a lookup layer and generates the vocabulary by +#' analyzing the dataset. +#' +#' ```{r} +#' data <- rbind(c("a", "c", "d"), c("d", "z", "b")) +#' layer <- layer_string_lookup() +#' layer %>% adapt(data) +#' get_vocabulary(layer) +#' ``` +#' +#' Note that the OOV token `"[UNK]"` has been added to the vocabulary. +#' The remaining tokens are sorted by frequency +#' (`"d"`, which has 2 occurrences, is first) then by inverse sort order. +#' +#' ```{r} +#' data <- rbind(c("a", "c", "d"), c("d", "z", "b")) +#' layer <- layer_string_lookup() +#' layer %>% adapt(data) +#' layer(data) +#' ``` +#' +#' **Lookups with multiple OOV indices** +#' +#' This example demonstrates how to use a lookup layer with multiple OOV +#' indices. When a layer is created with more than one OOV index, any OOV +#' values are hashed into the number of OOV buckets, distributing OOV values in +#' a deterministic fashion across the set. +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' data <- rbind(c("a", "c", "d"), c("m", "z", "b")) +#' layer <- layer_string_lookup(vocabulary = vocab, num_oov_indices = 2) +#' layer(data) +#' ``` +#' +#' Note that the output for OOV value 'm' is 0, while the output for OOV value +#' `"z"` is 1. The in-vocab terms have their output index increased by 1 from +#' earlier examples (a maps to 2, etc) in order to make space for the extra OOV +#' value. +#' +#' **One-hot output** +#' +#' Configure the layer with `output_mode='one_hot'`. Note that the first +#' `num_oov_indices` dimensions in the ont_hot encoding represent OOV values. +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' data <- c("a", "b", "c", "d", "z") +#' layer <- layer_string_lookup(vocabulary = vocab, output_mode = 'one_hot') +#' layer(data) +#' ``` +#' +#' **Multi-hot output** +#' +#' Configure the layer with `output_mode='multi_hot'`. Note that the first +#' `num_oov_indices` dimensions in the multi_hot encoding represent OOV values. +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' data <- rbind(c("a", "c", "d", "d"), c("d", "z", "b", "z")) +#' layer <- layer_string_lookup(vocabulary = vocab, output_mode = 'multi_hot') +#' layer(data) +#' ``` +#' +#' **Token count output** +#' +#' Configure the layer with `output_mode='count'`. As with multi_hot output, +#' the first `num_oov_indices` dimensions in the output represent OOV values. +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' data <- rbind(c("a", "c", "d", "d"), c("d", "z", "b", "z")) +#' layer <- layer_string_lookup(vocabulary = vocab, output_mode = 'count') +#' layer(data) +#' ``` +#' +#' **TF-IDF output** +#' +#' Configure the layer with `output_mode="tf_idf"`. As with multi_hot output, +#' the first `num_oov_indices` dimensions in the output represent OOV values. +#' +#' Each token bin will output `token_count * idf_weight`, where the idf weights +#' are the inverse document frequency weights per token. These should be +#' provided along with the vocabulary. Note that the `idf_weight` for OOV +#' values will default to the average of all idf weights passed in. +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' idf_weights <- c(0.25, 0.75, 0.6, 0.4) +#' data <- rbind(c("a", "c", "d", "d"), c("d", "z", "b", "z")) +#' layer <- layer_string_lookup(output_mode = "tf_idf") +#' layer %>% set_vocabulary(vocab, idf_weights=idf_weights) +#' layer(data) +#' ``` +#' +#' To specify the idf weights for oov values, you will need to pass the entire +#' vocabulary including the leading oov token. +#' +#' ```{r} +#' vocab <- c("[UNK]", "a", "b", "c", "d") +#' idf_weights <- c(0.9, 0.25, 0.75, 0.6, 0.4) +#' data <- rbind(c("a", "c", "d", "d"), c("d", "z", "b", "z")) +#' layer <- layer_string_lookup(output_mode = "tf_idf") +#' layer %>% set_vocabulary(vocab, idf_weights=idf_weights) +#' layer(data) +#' ``` +#' +#' When adapting the layer in `"tf_idf"` mode, each input sample will be +#' considered a document, and IDF weight per token will be calculated as +#' `log(1 + num_documents / (1 + token_document_count))`. +#' +#' **Inverse lookup** +#' +#' This example demonstrates how to map indices to strings using this layer. +#' (You can also use `adapt()` with `inverse=TRUE`, but for simplicity we'll +#' pass the vocab in this example.) +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' data <- rbind(c(1, 3, 4), c(4, 0, 2)) +#' layer <- layer_string_lookup(vocabulary = vocab, invert = TRUE) +#' layer(data) +#' ``` +#' +#' Note that the first index correspond to the oov token by default. +#' +#' **Forward and inverse lookup pairs** +#' +#' This example demonstrates how to use the vocabulary of a standard lookup +#' layer to create an inverse lookup layer. +#' +#' ```{r} +#' vocab <- c("a", "b", "c", "d") +#' data <- rbind(c("a", "c", "d"), c("d", "z", "b")) +#' layer <- layer_string_lookup(vocabulary = vocab) +#' i_layer <- layer_string_lookup(vocabulary = vocab, invert = TRUE) +#' int_data <- layer(data) +#' i_layer(int_data) +#' ``` +#' +#' In this example, the input value `"z"` resulted in an output of `"[UNK]"`, +#' since 1000 was not in the vocabulary - it got represented as an OOV, and all +#' OOV values are returned as `"[UNK]"` in the inverse layer. Also, note that +#' for the inverse to work, you must have already set the forward layer +#' vocabulary either directly or via `adapt()` before calling +#' `get_vocabulary()`. +#' +#' @param max_tokens +#' Maximum size of the vocabulary for this layer. This should +#' only be specified when adapting the vocabulary or when setting +#' `pad_to_max_tokens=TRUE`. If NULL, there is no cap on the size of +#' the vocabulary. Note that this size includes the OOV +#' and mask tokens. Defaults to `NULL`. +#' +#' @param num_oov_indices +#' The number of out-of-vocabulary tokens to use. +#' If this value is more than 1, OOV inputs are modulated to +#' determine their OOV value. +#' If this value is 0, OOV inputs will cause an error when calling +#' the layer. Defaults to `1`. +#' +#' @param mask_token +#' A token that represents masked inputs. When `output_mode` is +#' `"int"`, the token is included in vocabulary and mapped to index 0. +#' In other output modes, the token will not appear +#' in the vocabulary and instances of the mask token +#' in the input will be dropped. If set to `NULL`, #' no mask term will be added. Defaults to `NULL`. #' -#' @param oov_token Only used when `invert` is TRUE. The token to return for OOV +#' @param oov_token +#' Only used when `invert` is TRUE. The token to return for OOV #' indices. Defaults to `"[UNK]"`. #' -#' @param vocabulary Optional. Either an array of strings or a string path to a text -#' file. If passing an array, can pass a list, list, 1D numpy array, or 1D -#' tensor containing the string vocabulary terms. If passing a file path, the -#' file should contain one line per term in the vocabulary. If this argument -#' is set, there is no need to `adapt` the layer. -#' -#' @param encoding String encoding. Default of `NULL` is equivalent to `"utf-8"`. -#' -#' @param invert Only valid when `output_mode` is `"int"`. If TRUE, this layer will -#' map indices to vocabulary items instead of mapping vocabulary items to -#' indices. Default to `FALSE`. -#' -#' @param output_mode Specification for the output of the layer. Defaults to `"int"`. -#' Values can be `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or -#' `"tf_idf"` configuring the layer as follows: -#' - `"int"`: Return the raw integer indices of the input tokens. -#' - `"one_hot"`: Encodes each individual element in the input into an -#' array the same size as the vocabulary, containing a 1 at the element -#' index. If the last dimension is size 1, will encode on that dimension. -#' If the last dimension is not size 1, will append a new dimension for -#' the encoded output. -#' - `"multi_hot"`: Encodes each sample in the input into a single array -#' the same size as the vocabulary, containing a 1 for each vocabulary -#' term present in the sample. Treats the last dimension as the sample -#' dimension, if input shape is (..., sample_length), output shape will -#' be (..., num_tokens). -#' - `"count"`: As `"multi_hot"`, but the int array contains a count of the -#' number of times the token at that index appeared in the sample. -#' - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is applied to -#' find the value in each token slot. -#' For `"int"` output, any shape of input and output is supported. For all -#' other output modes, currently only output up to rank 2 is supported. -#' -#' @param pad_to_max_tokens Only applicable when `output_mode` is `"multi_hot"`, -#' `"count"`, or `"tf_idf"`. If TRUE, the output will have its feature axis -#' padded to `max_tokens` even if the number of unique tokens in the -#' vocabulary is less than max_tokens, resulting in a tensor of shape -#' `[batch_size, max_tokens]` regardless of vocabulary size. Defaults to `FALSE`. -#' -#' @param sparse Boolean. Only applicable when `output_mode` is `"multi_hot"`, -#' `"count"`, or `"tf_idf"`. If TRUE, returns a `SparseTensor` instead of a -#' dense `Tensor`. Defaults to `FALSE`. -#' -#' @param ... standard layer arguments. -#' +#' @param vocabulary +#' Optional. Either an array of integers or a string path to a +#' text file. If passing an array, can pass a list, list, +#' 1D NumPy array, or 1D tensor containing the integer vocbulary terms. +#' If passing a file path, the file should contain one line per term +#' in the vocabulary. If this argument is set, +#' there is no need to `adapt()` the layer. +#' +#' @param vocabulary_dtype +#' The dtype of the vocabulary terms, for example +#' `"int64"` or `"int32"`. Defaults to `"int64"`. +#' +#' @param idf_weights +#' Only valid when `output_mode` is `"tf_idf"`. +#' A list, list, 1D NumPy array, or 1D tensor or the same length +#' as the vocabulary, containing the floating point inverse document +#' frequency weights, which will be multiplied by per sample term +#' counts for the final TF-IDF weight. +#' If the `vocabulary` argument is set, and `output_mode` is +#' `"tf_idf"`, this argument must be supplied. +#' +#' @param invert +#' Only valid when `output_mode` is `"int"`. +#' If `TRUE`, this layer will map indices to vocabulary items +#' instead of mapping vocabulary items to indices. +#' Defaults to `FALSE`. +#' +#' @param output_mode +#' Specification for the output of the layer. Values can be +#' `"int"`, `"one_hot"`, `"multi_hot"`, `"count"`, or `"tf_idf"` +#' configuring the layer as follows: +#' - `"int"`: Return the vocabulary indices of the input tokens. +#' - `"one_hot"`: Encodes each individual element in the input into an +#' array the same size as the vocabulary, +#' containing a 1 at the element index. If the last dimension +#' is size 1, will encode on that dimension. +#' If the last dimension is not size 1, will append a new +#' dimension for the encoded output. +#' - `"multi_hot"`: Encodes each sample in the input into a single +#' array the same size as the vocabulary, +#' containing a 1 for each vocabulary term present in the sample. +#' Treats the last dimension as the sample dimension, +#' if input shape is `(..., sample_length)`, +#' output shape will be `(..., num_tokens)`. +#' - `"count"`: As `"multi_hot"`, but the int array contains +#' a count of the number of times the token at that index +#' appeared in the sample. +#' - `"tf_idf"`: As `"multi_hot"`, but the TF-IDF algorithm is +#' applied to find the value in each token slot. +#' For `"int"` output, any shape of input and output is supported. +#' For all other output modes, currently only output up to rank 2 +#' is supported. Defaults to `"int"`. +#' +#' @param pad_to_max_tokens +#' Only applicable when `output_mode` is `"multi_hot"`, +#' `"count"`, or `"tf_idf"`. If `TRUE`, the output will have +#' its feature axis padded to `max_tokens` even if the number +#' of unique tokens in the vocabulary is less than `max_tokens`, +#' resulting in a tensor of shape `(batch_size, max_tokens)` +#' regardless of vocabulary size. Defaults to `FALSE`. +#' +#' @param sparse +#' Boolean. Only applicable to `"multi_hot"`, `"count"`, and +#' `"tf_idf"` output modes. Only supported with TensorFlow +#' backend. If `TRUE`, returns a `SparseTensor` +#' instead of a dense `Tensor`. Defaults to `FALSE`. +#' +#' @param encoding +#' Optional. The text encoding to use to interpret the input +#' strings. Defaults to `"utf-8"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export #' @family categorical features preprocessing layers #' @family preprocessing layers -#' +#' @family layers #' @seealso -#' - [`adapt()`] -#' - -#' - +#' + +# + #' -#' @export +#' @tether keras.layers.StringLookup layer_string_lookup <- -function(object, - max_tokens = NULL, - num_oov_indices = 1L, - mask_token = NULL, - oov_token = '[UNK]', - vocabulary = NULL, - encoding = NULL, - invert = FALSE, - output_mode = 'int', - sparse = FALSE, - pad_to_max_tokens = FALSE, - ...) -{ - require_tf_version("2.6", "layer_string_lookup()") - args <- capture_args(match.call(), - list(num_oov_indices = as.integer, - output_mode = fix_string), - ignore = "object") - create_layer(keras$layers$StringLookup, object, args) -} - - -## ---- numerical features preprocessing ---- - - - -#' A preprocessing layer which normalizes continuous features. -#' -#' @details -#' This layer will shift and scale inputs into a distribution centered around 0 -#' with standard deviation 1. It accomplishes this by precomputing the mean and -#' variance of the data, and calling `(input - mean) / sqrt(var)` at runtime. -#' -#' The mean and variance values for the layer must be either supplied on -#' construction or learned via `adapt()`. `adapt()` will compute the mean and -#' variance of the data and store them as the layer's weights. `adapt()` should -#' be called before `fit()`, `evaluate()`, or `predict()`. -#' -#' @inheritParams layer_dense -#' -#' @param axis Integer, list of integers, or NULL. The axis or axes that should -#' have a separate mean and variance for each index in the shape. For -#' example, if shape is `(NULL, 5)` and `axis=1`, the layer will track 5 -#' separate mean and variance values for the last axis. If `axis` is set to -#' `NULL`, the layer will normalize all elements in the input by a scalar -#' mean and variance. Defaults to -1, where the last axis of the input is -#' assumed to be a feature dimension and is normalized per index. Note that -#' in the specific case of batched scalar inputs where the only axis is the -#' batch axis, the default will normalize each index in the batch -#' separately. In this case, consider passing `axis = NULL`. -#' -#' @param mean The mean value(s) to use during normalization. The passed value(s) -#' will be broadcast to the shape of the kept axes above; if the value(s) -#' cannot be broadcast, an error will be raised when this layer's `build()` -#' method is called. -#' -#' @param variance The variance value(s) to use during normalization. The passed -#' value(s) will be broadcast to the shape of the kept axes above; if the -#' value(s) cannot be broadcast, an error will be raised when this layer's -#' `build()` method is called. -#' -#' @param ... standard layer arguments. -#' -#' @family numerical features preprocessing layers -#' @family preprocessing layers -#' -#' @seealso -#' - [`adapt()`] -#' - -#' - -#' @export -layer_normalization <- -function(object, axis = -1L, mean=NULL, variance=NULL, ...) +function (object, max_tokens = NULL, num_oov_indices = 1L, mask_token = NULL, + oov_token = "[UNK]", vocabulary = NULL, idf_weights = NULL, + invert = FALSE, output_mode = "int", pad_to_max_tokens = FALSE, + sparse = FALSE, encoding = "utf-8", name = NULL, ..., vocabulary_dtype = NULL) { - require_tf_version("2.6", "layer_normalization()") - args <- capture_args(match.call(), list(axis = as_axis), - ignore = "object") - create_layer(keras$layers$Normalization, object, args) + args <- capture_args(list(num_oov_indices = as_integer, + mask_token = as_integer, vocabulary = as_integer, invert = as_integer, + output_mode = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$StringLookup, object, args) } -#' A preprocessing layer which buckets continuous features by ranges. -#' -#' @details -#' This layer will place each element of its input data into one of several -#' contiguous ranges and output an integer index indicating which range each -#' element was placed in. -#' -#' Input shape: -#' Any `tf.Tensor` or `tf.RaggedTensor` of dimension 2 or higher. -#' -#' Output shape: -#' Same as input shape. -#' -#' @inheritParams layer_dense -#' -#' @param bin_boundaries A list of bin boundaries. The leftmost and rightmost bins -#' will always extend to `-Inf` and `Inf`, so `bin_boundaries = c(0., 1., 2.)` -#' generates bins `(-Inf, 0.)`, `[0., 1.)`, `[1., 2.)`, and `[2., +Inf)`. If -#' this option is set, `adapt` should not be called. -#' -#' @param num_bins The integer number of bins to compute. If this option is set, -#' `adapt` should be called to learn the bin boundaries. -#' -#' @param epsilon Error tolerance, typically a small fraction close to zero (e.g. -#' 0.01). Higher values of epsilon increase the quantile approximation, and -#' hence result in more unequal buckets, but could improve performance -#' and resource consumption. -#' -#' @param ... standard layer arguments. -#' -#' @family numerical features preprocessing layers -#' @family preprocessing layers -#' -#' @seealso -#' - [`adapt()`] -#' - -#' - -#' @export -layer_discretization <- -function(object, bin_boundaries=NULL, num_bins = NULL, epsilon = 0.01, ...) -{ - require_tf_version("2.6", "layer_discretization()") - args <- capture_args(match.call(), - ignore = "object") - create_layer(keras$layers$Discretization, object, args) -} - - -# TODO: all the layers should have consistently formatted sections: -# Input shape, Output shape - - -# ---- text preprocessing ---- - - #' A preprocessing layer which maps text features to integer sequences. #' -#' @details +#' @description #' This layer has basic options for managing text in a Keras model. It -#' transforms a batch of strings (one example = one string) into either a list of -#' token indices (one example = 1D tensor of integer token indices) or a dense -#' representation (one example = 1D tensor of float values representing data -#' about the example's tokens). +#' transforms a batch of strings (one example = one string) into either a list +#' of token indices (one example = 1D tensor of integer token indices) or a +#' dense representation (one example = 1D tensor of float values representing +#' data about the example's tokens). This layer is meant to handle natural +#' language inputs. To handle simple string inputs (categorical strings or +#' pre-tokenized strings) see `layer_string_lookup()`. #' #' The vocabulary for the layer must be either supplied on construction or #' learned via `adapt()`. When this layer is adapted, it will analyze the @@ -1199,154 +2118,239 @@ function(object, bin_boundaries=NULL, num_bins = NULL, epsilon = 0.01, ...) #' #' 1. Any callable can be passed to this Layer, but if you want to serialize #' this object you should only pass functions that are registered Keras -#' serializables (see [`tf$keras$utils$register_keras_serializable`](https://www.tensorflow.org/api_docs/python/tf/keras/utils/register_keras_serializable) +#' serializables (see [`register_keras_serializable()`] #' for more details). #' 2. When using a custom callable for `standardize`, the data received #' by the callable will be exactly as passed to this layer. The callable #' should return a tensor of the same shape as the input. #' 3. When using a custom callable for `split`, the data received by the #' callable will have the 1st dimension squeezed out - instead of -#' `matrix(c("string to split", "another string to split"))`, the Callable will -#' see `c("string to split", "another string to split")`. The callable should -#' return a Tensor with the first dimension containing the split tokens - +#' `list("string to split", "another string to split")`, the Callable will +#' see `c("string to split", "another string to split")`. +#' The callable should return a `tf.Tensor` of dtype `string` +#' with the first dimension containing the split tokens - #' in this example, we should see something like `list(c("string", "to", -#' "split"), c("another", "string", "to", "split"))`. This makes the callable -#' site natively compatible with `tf$strings$split()`. -#' -#' @inheritParams layer_dense -#' -#' @param max_tokens The maximum size of the vocabulary for this layer. If NULL, -#' there is no cap on the size of the vocabulary. Note that this vocabulary -#' contains 1 OOV token, so the effective number of tokens is `(max_tokens - -#' 1 - (1 if output_mode == "int" else 0))`. -#' -#' @param standardize Optional specification for standardization to apply to the -#' input text. Values can be NULL (no standardization), -#' `"lower_and_strip_punctuation"` (lowercase and remove punctuation) or a -#' Callable. Default is `"lower_and_strip_punctuation"`. -#' -#' @param split Optional specification for splitting the input text. Values can be -#' NULL (no splitting), `"whitespace"` (split on ASCII whitespace), or a -#' Callable. The default is `"whitespace"`. -#' -#' @param ngrams Optional specification for ngrams to create from the possibly-split -#' input text. Values can be NULL, an integer or list of integers; passing -#' an integer will create ngrams up to that integer, and passing a list of -#' integers will create ngrams for the specified values in the list. Passing -#' NULL means that no ngrams will be created. -#' -#' @param output_mode Optional specification for the output of the layer. Values can -#' be `"int"`, `"multi_hot"`, `"count"` or `"tf_idf"`, configuring the layer -#' as follows: -#' - `"int"`: Outputs integer indices, one integer index per split string -#' token. When `output_mode == "int"`, 0 is reserved for masked -#' locations; this reduces the vocab size to -#' `max_tokens - 2` instead of `max_tokens - 1`. -#' - `"multi_hot"`: Outputs a single int array per batch, of either -#' vocab_size or max_tokens size, containing 1s in all elements where the -#' token mapped to that index exists at least once in the batch item. -#' - `"count"`: Like `"multi_hot"`, but the int array contains a count of -#' the number of times the token at that index appeared in the -#' batch item. -#' - `"tf_idf"`: Like `"multi_hot"`, but the TF-IDF algorithm is applied to -#' find the value in each token slot. -#' For `"int"` output, any shape of input and output is supported. For all -#' other output modes, currently only rank 1 inputs (and rank 2 outputs after -#' splitting) are supported. -#' -#' @param output_sequence_length Only valid in INT mode. If set, the output will have -#' its time dimension padded or truncated to exactly `output_sequence_length` -#' values, resulting in a tensor of shape +#' "split"), c("another", "string", "to", "split"))`. +#' +#' **Note:** This layer uses TensorFlow internally. It cannot +#' be used as part of the compiled computation graph of a model with +#' any backend other than TensorFlow. +#' It can however be used with any backend when running eagerly. +#' It can also always be used as part of an input preprocessing pipeline +#' with any backend (outside the model itself), which is how we recommend +#' to use this layer. +#' +#' **Note:** This layer is safe to use inside a `tf.data` pipeline +#' (independently of which backend you're using). +#' +#' # Examples +#' This example instantiates a `TextVectorization` layer that lowercases text, +#' splits on whitespace, strips punctuation, and outputs integer vocab indices. +#' +#' ```{r} +#' max_tokens <- 5000 # Maximum vocab size. +#' max_len <- 4 # Sequence length to pad the outputs to. +#' # Create the layer. +#' vectorize_layer <- layer_text_vectorization( +#' max_tokens = max_tokens, +#' output_mode = 'int', +#' output_sequence_length = max_len) +#' ``` +#' +#' ```{r} +#' # Now that the vocab layer has been created, call `adapt` on the +#' # list of strings to create the vocabulary. +#' vectorize_layer %>% adapt(c("foo bar", "bar baz", "baz bada boom")) +#' ``` +#' +#' ```{r} +#' # Now, the layer can map strings to integers -- you can use an +#' # embedding layer to map these integers to learned embeddings. +#' input_data <- rbind("foo qux bar", "qux baz") +#' vectorize_layer(input_data) +#' ``` +#' +#' This example instantiates a `TextVectorization` layer by passing a list +#' of vocabulary terms to the layer's `initialize()` method. +#' +#' ```{r} +#' vocab_data <- c("earth", "wind", "and", "fire") +#' max_len <- 4 # Sequence length to pad the outputs to. +#' # Create the layer, passing the vocab directly. You can also pass the +#' # vocabulary arg a path to a file containing one vocabulary word per +#' # line. +#' vectorize_layer <- layer_text_vectorization( +#' max_tokens = max_tokens, +#' output_mode = 'int', +#' output_sequence_length = max_len, +#' vocabulary = vocab_data) +#' ``` +#' +#' ```{r} +#' # Because we've passed the vocabulary directly, we don't need to adapt +#' # the layer - the vocabulary is already set. The vocabulary contains the +#' # padding token ('') and OOV token ('[UNK]') +#' # as well as the passed tokens. +#' vectorize_layer %>% get_vocabulary() +#' # ['', '[UNK]', 'earth', 'wind', 'and', 'fire'] +#' ``` +#' +#' @param max_tokens +#' Maximum size of the vocabulary for this layer. This should +#' only be specified when adapting a vocabulary or when setting +#' `pad_to_max_tokens=TRUE`. Note that this vocabulary +#' contains 1 OOV token, so the effective number of tokens is +#' `(max_tokens - 1 - (1 if output_mode == "int" else 0))`. +#' +#' @param standardize +#' Optional specification for standardization to apply to the +#' input text. Values can be: +#' - `NULL`: No standardization. +#' - `"lower_and_strip_punctuation"`: Text will be lowercased and all +#' punctuation removed. +#' - `"lower"`: Text will be lowercased. +#' - `"strip_punctuation"`: All punctuation will be removed. +#' - Callable: Inputs will passed to the callable function, +#' which should be standardized and returned. +#' +#' @param split +#' Optional specification for splitting the input text. +#' Values can be: +#' - `NULL`: No splitting. +#' - `"whitespace"`: Split on whitespace. +#' - `"character"`: Split on each unicode character. +#' - Callable: Standardized inputs will passed to the callable +#' function, which should be split and returned. +#' +#' @param ngrams +#' Optional specification for ngrams to create from the +#' possibly-split input text. Values can be `NULL`, an integer +#' or list of integers; passing an integer will create ngrams +#' up to that integer, and passing a list of integers will +#' create ngrams for the specified values in the list. +#' Passing `NULL` means that no ngrams will be created. +#' +#' @param output_mode +#' Optional specification for the output of the layer. +#' Values can be `"int"`, `"multi_hot"`, `"count"` or `"tf_idf"`, +#' configuring the layer as follows: +#' - `"int"`: Outputs integer indices, one integer index per split +#' string token. When `output_mode == "int"`, +#' 0 is reserved for masked locations; +#' this reduces the vocab size to `max_tokens - 2` +#' instead of `max_tokens - 1`. +#' - `"multi_hot"`: Outputs a single int array per batch, of either +#' vocab_size or max_tokens size, containing 1s in all elements +#' where the token mapped to that index exists at least +#' once in the batch item. +#' - `"count"`: Like `"multi_hot"`, but the int array contains +#' a count of the number of times the token at that index +#' appeared in the batch item. +#' - `"tf_idf"`: Like `"multi_hot"`, but the TF-IDF algorithm +#' is applied to find the value in each token slot. +#' For `"int"` output, any shape of input and output is supported. +#' For all other output modes, currently only rank 1 inputs +#' (and rank 2 outputs after splitting) are supported. +#' +#' @param output_sequence_length +#' Only valid in INT mode. If set, the output will +#' have its time dimension padded or truncated to exactly +#' `output_sequence_length` values, resulting in a tensor of shape #' `(batch_size, output_sequence_length)` regardless of how many tokens -#' resulted from the splitting step. Defaults to NULL. -#' -#' @param pad_to_max_tokens Only valid in `"multi_hot"`, `"count"`, and `"tf_idf"` -#' modes. If TRUE, the output will have its feature axis padded to -#' `max_tokens` even if the number of unique tokens in the vocabulary is less -#' than max_tokens, resulting in a tensor of shape `(batch_size, max_tokens)` -#' regardless of vocabulary size. Defaults to FALSE. -#' -#' @param vocabulary Optional for `layer_text_vectorization()`. Either an array -#' of strings or a string path to a text file. If passing an array, can pass -#' an R list or character vector, 1D numpy array, or 1D tensor containing the -#' string vocabulary terms. If passing a file path, the file should contain -#' one line per term in the vocabulary. If vocabulary is set (either by -#' passing `layer_text_vectorization(vocabulary = ...)` or by calling -#' `set_vocabulary(layer, vocabulary = ...`), there is no need to `adapt()` -#' the layer. -#' -#' @family text preprocessing layers +#' resulted from the splitting step. Defaults to `NULL`. If `ragged` +#' is `TRUE` then `output_sequence_length` may still truncate the +#' output. +#' +#' @param pad_to_max_tokens +#' Only valid in `"multi_hot"`, `"count"`, +#' and `"tf_idf"` modes. If `TRUE`, the output will have +#' its feature axis padded to `max_tokens` even if the number +#' of unique tokens in the vocabulary is less than `max_tokens`, +#' resulting in a tensor of shape `(batch_size, max_tokens)` +#' regardless of vocabulary size. Defaults to `FALSE`. +#' +#' @param vocabulary +#' Optional. Either an array of strings or a string path to a +#' text file. If passing an array, can pass a list, list, +#' 1D NumPy array, or 1D tensor containing the string vocabulary terms. +#' If passing a file path, the file should contain one line per term +#' in the vocabulary. If this argument is set, +#' there is no need to `adapt()` the layer. +#' +#' @param idf_weights +#' Only valid when `output_mode` is `"tf_idf"`. A list, list, +#' 1D NumPy array, or 1D tensor of the same length as the vocabulary, +#' containing the floating point inverse document frequency weights, +#' which will be multiplied by per sample term counts for +#' the final `tf_idf` weight. If the `vocabulary` argument is set, +#' and `output_mode` is `"tf_idf"`, this argument must be supplied. +#' +#' @param ragged +#' Boolean. Only applicable to `"int"` output mode. +#' Only supported with TensorFlow backend. +#' If `TRUE`, returns a `RaggedTensor` instead of a dense `Tensor`, +#' where each sequence may have a different length +#' after string splitting. Defaults to `FALSE`. +#' +#' @param sparse +#' Boolean. Only applicable to `"multi_hot"`, `"count"`, and +#' `"tf_idf"` output modes. Only supported with TensorFlow +#' backend. If `TRUE`, returns a `SparseTensor` +#' instead of a dense `Tensor`. Defaults to `FALSE`. +#' +#' @param encoding +#' Optional. The text encoding to use to interpret the input +#' strings. Defaults to `"utf-8"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export #' @family preprocessing layers -#' -#' -#' @param ... standard layer arguments. -#' +#' @family layers #' @seealso -#' - [`adapt()`] -#' - -#' - -#' @export +#' + +# + +#' +#' @tether keras.layers.TextVectorization layer_text_vectorization <- -function(object, - max_tokens = NULL, - standardize = 'lower_and_strip_punctuation', - split = 'whitespace', - ngrams = NULL, - output_mode = 'int', - output_sequence_length = NULL, - pad_to_max_tokens = FALSE, - vocabulary = NULL, - ...) +function (object, max_tokens = NULL, standardize = "lower_and_strip_punctuation", + split = "whitespace", ngrams = NULL, output_mode = "int", + output_sequence_length = NULL, pad_to_max_tokens = FALSE, + vocabulary = NULL, idf_weights = NULL, sparse = FALSE, ragged = FALSE, + encoding = "utf-8", name = NULL, ...) { + args <- capture_args(list(max_tokens = as_integer, ngrams = function (x) + if (length(x) > 1) + as_integer_tuple(x) + else as_integer(x), output_mode = as_integer, output_sequence_length = as_integer, + ragged = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$TextVectorization, object, args) +} - # TODO: in TF 2.8, new args: sparse=FALSE, ragged=FALSE, idf_weights=NULL - if (tf_version() >= "2.6") { - callable <- keras$layers$TextVectorization - # output_mode_choices <- c("int", "multi_hot", "count", "tf_idf") - } else { - # warning("Defaults to layer_text_vectorization() were changed in Tensorflow 2.6.", - # "Please consult the docs and update your code") - callable <- keras$layers$experimental$preprocessing$TextVectorization - # output_mode_choices <- c("int", "binary", "count", "tf-idf") - # on the python side, "binary" is renamed to "multi_hot", "tf-idf" renamed to "tf_idf" - } - - - modifiers <- list( - max_tokens = as_nullable_integer, - output_sequence_length = as_nullable_integer, - ngrams = function(x) - if (length(x) > 1) as_integer_tuple(x) else as_nullable_integer(x), - # output_mode = function(x) fix_string(match.arg(x, output_mode_choices)), - output_mode = fix_string, - split = fix_string, - standardize = fix_string - ) - - args <- capture_args(match.call(), modifiers, ignore = "object") - - if("vocabulary" %in% names(args)) - require_tf_version("2.4", "pass `vocabulary` argument to layer_text_vectorization()") - create_layer(callable, object, args) -} +# TODO: add tests/ confirm that `get_vocabulary()` returns an R character +# vector. In older TF versions it used to return python byte objects, which +# needed `x.decode("UTF-8") for x in vocab]` -#' @param include_special_tokens If True, the returned vocabulary will include +#' @param include_special_tokens If TRUE, the returned vocabulary will include #' the padding and OOV tokens, and a term's index in the vocabulary will equal -#' the term's index when calling the layer. If False, the returned vocabulary +#' the term's index when calling the layer. If FALSE, the returned vocabulary #' will not include any padding or OOV tokens. #' @rdname layer_text_vectorization #' @export get_vocabulary <- function(object, include_special_tokens=TRUE) { - if (tensorflow::tf_version() < "2.3") { - python_path <- system.file("python", package = "keras") - tools <- import_from_path("kerastools", path = python_path) - tools$get_vocabulary$get_vocabulary(object) - } else { - args <- capture_args(match.call(), ignore = "object") - do.call(object$get_vocabulary, args) - } + args <- capture_args(ignore = "object") + do.call(object$get_vocabulary, args) } #' @rdname layer_text_vectorization @@ -1355,21 +2359,154 @@ get_vocabulary <- function(object, include_special_tokens=TRUE) { #' output_mode is "tf_idf". Should not be set otherwise. #' @export set_vocabulary <- function(object, vocabulary, idf_weights=NULL, ...) { - args <- capture_args(match.call(), ignore = "object") - - if (tf_version() < "2.6") { - # required arg renamed when promoted out of experimental in 2.6: - # vocab -> vocabulary. Position as first unchanged. - vocab <- args[["vocabulary"]] %||% args[["vocab"]] - args[c("vocab", "vocabulary")] <- NULL - args <- c(list(vocab), args) - } - + args <- capture_args(ignore = "object") do.call(object$set_vocabulary, args) invisible(object) } +## TODO: TextVectorization has a compile() method. investigate if this is +## actually useful to export +#compile.keras.engine.base_preprocessing_layer.PreprocessingLayer <- +function(object, run_eagerly = NULL, steps_per_execution = NULL, ...) { + args <- capture_args(ignore="object") + do.call(object$compile, args) +} + + +#' A preprocessing layer to convert raw audio signals to Mel spectrograms. +#' +#' @description +#' This layer takes `float32`/`float64` single or batched audio signal as +#' inputs and computes the Mel spectrogram using Short-Time Fourier Transform +#' and Mel scaling. The input should be a 1D (unbatched) or 2D (batched) tensor +#' representing audio signals. The output will be a 2D or 3D tensor +#' representing Mel spectrograms. +#' +#' A spectrogram is an image-like representation that shows the frequency +#' spectrum of a signal over time. It uses x-axis to represent time, y-axis to +#' represent frequency, and each pixel to represent intensity. +#' Mel spectrograms are a special type of spectrogram that use the mel scale, +#' which approximates how humans perceive sound. They are commonly used in +#' speech and music processing tasks like speech recognition, speaker +#' identification, and music genre classification. +#' +#' # References +#' - [Spectrogram](https://en.wikipedia.org/wiki/Spectrogram), +#' - [Mel scale](https://en.wikipedia.org/wiki/Mel_scale). +#' +#' # Examples +#' **Unbatched audio signal** +#' +#' ```r +#' layer <- layer_mel_spectrogram( +#' num_mel_bins = 64, +#' sampling_rate = 8000, +#' sequence_stride = 256, +#' fft_length = 2048 +#' ) +#' layer(random_uniform(shape = c(16000))) |> shape() +#' ``` +#' +#' **Batched audio signal** +#' +#' ```r +#' layer <- layer_mel_spectrogram( +#' num_mel_bins = 80, +#' sampling_rate = 8000, +#' sequence_stride = 128, +#' fft_length = 2048 +#' ) +#' layer(random_uniform(shape = c(2, 16000))) |> shape() +#' ``` +#' +#' # Input Shape +#' 1D (unbatched) or 2D (batched) tensor with shape:`(..., samples)`. +#' +#' # Output Shape +#' 2D (unbatched) or 3D (batched) tensor with +#' shape:`(..., num_mel_bins, time)`. +#' +#' @param fft_length +#' Integer, size of the FFT window. +#' +#' @param sequence_stride +#' Integer, number of samples between successive STFT +#' columns. +#' +#' @param sequence_length +#' Integer, size of the window used for applying +#' `window` to each audio frame. If `NULL`, defaults to `fft_length`. +#' +#' @param window +#' String, name of the window function to use. Available values +#' are `"hann"` and `"hamming"`. If `window` is a tensor, it will be +#' used directly as the window and its length must be +#' `sequence_length`. If `window` is `NULL`, no windowing is +#' used. Defaults to `"hann"`. +#' +#' @param sampling_rate +#' Integer, sample rate of the input signal. +#' +#' @param num_mel_bins +#' Integer, number of mel bins to generate. +#' +#' @param min_freq +#' Float, minimum frequency of the mel bins. +#' +#' @param max_freq +#' Float, maximum frequency of the mel bins. +#' If `NULL`, defaults to `sampling_rate / 2`. +#' +#' @param power_to_db +#' If TRUE, convert the power spectrogram to decibels. +#' +#' @param top_db +#' Float, minimum negative cut-off `max(10 * log10(S)) - top_db`. +#' +#' @param mag_exp +#' Float, exponent for the magnitude spectrogram. +#' 1 for magnitude, 2 for power, etc. Default is 2. +#' +#' @param ref_power +#' Float, the power is scaled relative to it +#' `10 * log10(S / ref_power)`. +#' +#' @param min_power +#' Float, minimum value for power and `ref_power`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @family audio preprocessing layers +#' @family preprocessing layers +#' @family layers +#' @export +#' @tether keras.layers.MelSpectrogram +layer_mel_spectrogram <- +function (object, fft_length = 2048L, sequence_stride = 512L, + sequence_length = NULL, window = "hann", sampling_rate = 16000L, + num_mel_bins = 128L, min_freq = 20, max_freq = NULL, power_to_db = TRUE, + top_db = 80, mag_exp = 2, min_power = 1e-10, ref_power = 1, + ...) +{ + args <- capture_args(list(fft_length = as_integer, sequence_stride = as_integer, + sequence_length = as_integer, sampling_rate = as_integer, + num_mel_bins = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$MelSpectrogram, object, args) +} + + + + +# ---- adapt ---- + #' Fits the state of the preprocessing layer to the data being passed #' @@ -1381,47 +2518,44 @@ set_vocabulary <- function(object, vocabulary, idf_weights=NULL, ...) { #' when adapting each layer only once, but if you adapt a layer multiple times #' you will need to take care to re-compile any compiled functions as follows: #' -#' * If you are adding a preprocessing layer to a `keras.Model`, you need to +#' * If you are adding a preprocessing layer to a keras model, you need to #' call `compile(model)` after each subsequent call to `adapt()`. -#' * If you are calling a preprocessing layer inside `tfdatasets::dataset_map()`, -#' you should call `dataset_map()` again on the input `tf.data.Dataset` after each +#' * If you are calling a preprocessing layer inside [`tfdatasets::dataset_map()`], +#' you should call `dataset_map()` again on the input `Dataset` after each #' `adapt()`. -#' * If you are using a `tensorflow::tf_function()` directly which calls a preprocessing -#' layer, you need to call `tf_function` again on your callable after +#' * If you are using a [`tensorflow::tf_function()`] directly which calls a preprocessing +#' layer, you need to call `tf_function()` again on your callable after #' each subsequent call to `adapt()`. #' -#' `keras_model` example with multiple adapts: -#' ````r -#' layer <- layer_normalization(axis=NULL) +#' `keras_model()` example with multiple adapts: +#' ````{r} +#' layer <- layer_normalization(axis = NULL) #' adapt(layer, c(0, 2)) -#' model <- keras_model_sequential(layer) -#' predict(model, c(0, 1, 2)) # [1] -1 0 1 +#' model <- keras_model_sequential() |> layer() +#' predict(model, c(0, 1, 2), verbose = FALSE) # [1] -1 0 1 #' #' adapt(layer, c(-1, 1)) #' compile(model) # This is needed to re-compile model.predict! -#' predict(model, c(0, 1, 2)) # [1] 0 1 2 +#' predict(model, c(0, 1, 2), verbose = FALSE) # [1] 0 1 2 #' ```` #' -#' `tf.data.Dataset` example with multiple adapts: -#' ````r -#' layer <- layer_normalization(axis=NULL) +#' `tfdatasets` example with multiple adapts: +#' ````{r} +#' layer <- layer_normalization(axis = NULL) #' adapt(layer, c(0, 2)) #' input_ds <- tfdatasets::range_dataset(0, 3) -#' normalized_ds <- input_ds %>% +#' normalized_ds <- input_ds |> #' tfdatasets::dataset_map(layer) -#' str(reticulate::iterate(normalized_ds)) -#' # List of 3 -#' # $ :tf.Tensor([-1.], shape=(1,), dtype=float32) -#' # $ :tf.Tensor([0.], shape=(1,), dtype=float32) -#' # $ :tf.Tensor([1.], shape=(1,), dtype=float32) +#' str(tfdatasets::iterate(normalized_ds)) +#' #' adapt(layer, c(-1, 1)) -#' normalized_ds <- input_ds %>% +#' normalized_ds <- input_ds |> #' tfdatasets::dataset_map(layer) # Re-map over the input dataset. -#' str(reticulate::iterate(normalized_ds$as_numpy_iterator())) -#' # List of 3 -#' # $ : num [1(1d)] -1 -#' # $ : num [1(1d)] 0 -#' # $ : num [1(1d)] 1 +#' +#' normalized_ds |> +#' tfdatasets::as_array_iterator() |> +#' tfdatasets::iterate(simplify = FALSE) |> +#' str() #' ```` #' #' @param object Preprocessing layer object @@ -1431,8 +2565,8 @@ set_vocabulary <- function(object, vocabulary, idf_weights=NULL, ...) { #' #' @param batch_size Integer or `NULL`. Number of asamples per state update. If #' unspecified, `batch_size` will default to `32`. Do not specify the -#' batch_size if your data is in the form of datasets, generators, or -#' `keras.utils.Sequence` instances (since they generate batches). +#' batch_size if your data is in the form of a TF Dataset or a generator +#' (since they generate batches). #' #' @param steps Integer or `NULL`. Total number of steps (batches of samples) #' When training with input tensors such as TensorFlow data tensors, the @@ -1447,70 +2581,23 @@ set_vocabulary <- function(object, vocabulary, idf_weights=NULL, ...) { #' #' @family preprocessing layer methods #' -#' @seealso -#' -#' + -#' + +# @seealso +# + +# + #' +#' @returns Returns `object`, invisibly. #' @export adapt <- function(object, data, ..., batch_size=NULL, steps=NULL) { - if (!inherits(data, "python.builtin.object")) + if (!is_py_object(data)) data <- keras_array(data) # TODO: use as_tensor() here - args <- capture_args(match.call(), - list(batch_size = as_nullable_integer, - step = as_nullable_integer), - ignore = "object") + args <- capture_args(list(batch_size = as_nullable_integer, + step = as_nullable_integer), + ignore = c("object", "data")) + # `data` named to `dataset` in keras3 keras.utils.FeatureSpace + # pass it as a positional arg + args <- c(list(data), args) do.call(object$adapt, args) invisible(object) } - - - - - -## TODO: TextVectorization has a compile() method. investigate if this is -## actually useful to export -#compile.keras.engine.base_preprocessing_layer.PreprocessingLayer <- -function(object, run_eagerly = NULL, steps_per_execution = NULL, ...) { - args <- capture_args(match.call(), ignore="object") - do.call(object$compile, args) -} - - - -# TODO: add an 'experimental' tag in the R docs where appropriate - -require_tf_version <- function(ver, msg = "this function.") { - if (tf_version() < ver) - stop("Tensorflow version >=", ver, " required to use ", msg) -} - - -# in python keras, sometimes strings are compared with `is`, which is too strict -# https://github.com/keras-team/keras/blob/db3fa5d40ed19cdf89fc295e8d0e317fb64480d4/keras/layers/preprocessing/text_vectorization.py#L524 -# # there was already 1 PR submitted to fix this: -# https://github.com/tensorflow/tensorflow/pull/34420 -# This is a hack around that: deparse and reparse the string in python. This -# gives the python interpreter a chance to recycle the address for the identical -# string that's already in it's string pool, which then passes the `is` -# comparison. -fix_string <- local({ - py_reparse <- NULL # R CMD check: no visible binding - delayedAssign("py_reparse", - py_eval("lambda x: eval(repr(x), {'__builtins__':{}}, {})", - convert = FALSE)) - # Note, the globals dict {'__builtins__':{}} is a guardrail, not a security - # door. A well crafted string can still break out to execute arbitrary code in - # the python session, but it can do no more than can be done from the R - # process already. - # Can't use ast.literal_eval() because it fails the 'is' test. E.g.: - # bool('foo' is ast.literal_eval("'foo'")) == False - function(x) { - if (is.character(x)) - py_call(py_reparse, as.character(x)) - else - x - } -}) diff --git a/R/layers-recurrent-cells.R b/R/layers-recurrent-cells.R deleted file mode 100644 index c4e71bb517..0000000000 --- a/R/layers-recurrent-cells.R +++ /dev/null @@ -1,436 +0,0 @@ -#' Base class for recurrent layers -#' -#' @details -#' See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) -#' for details about the usage of RNN API. -#' -#' @inheritParams layer_dense -#' -#' @param cell A RNN cell instance or a list of RNN cell instances. -#' A RNN cell is a class that has: -#' - A `call(input_at_t, states_at_t)` method, returning -#' `(output_at_t, states_at_t_plus_1)`. The call method of the -#' cell can also take the optional argument `constants`, see -#' section "Note on passing external constants" below. -#' - A `state_size` attribute. This can be a single integer -#' (single state) in which case it is the size of the recurrent -#' state. This can also be a list of integers (one size per state). -#' The `state_size` can also be TensorShape or list of -#' TensorShape, to represent high dimension state. -#' - A `output_size` attribute. This can be a single integer or a -#' TensorShape, which represent the shape of the output. For backward -#' compatible reason, if this attribute is not available for the -#' cell, the value will be inferred by the first element of the -#' `state_size`. -#' - A `get_initial_state(inputs=NULL, batch_size=NULL, dtype=NULL)` -#' method that creates a tensor meant to be fed to `call()` as the -#' initial state, if the user didn't specify any initial state via other -#' means. The returned initial state should have a shape of -#' `[batch_size, cell$state_size]`. The cell might choose to create a -#' tensor full of zeros, or full of other values based on the cell's -#' implementation. -#' `inputs` is the input tensor to the RNN layer, which should -#' contain the batch size as first dimension (`inputs$shape[1]`), -#' and also dtype (`inputs$dtype`). Note that -#' the `shape[1]` might be `NULL` during the graph construction. Either -#' the `inputs` or the pair of `batch_size` and `dtype` are provided. -#' `batch_size` is a scalar tensor that represents the batch size -#' of the inputs. `dtype` is `tf.DType` that represents the dtype of -#' the inputs. -#' For backward compatibility, if this method is not implemented -#' by the cell, the RNN layer will create a zero filled tensor with the -#' size of `[batch_size, cell$state_size]`. -#' In the case that `cell` is a list of RNN cell instances, the cells -#' will be stacked on top of each other in the RNN, resulting in an -#' efficient stacked RNN. -#' -#' @param return_sequences Boolean (default `FALSE`). Whether to return the last -#' output in the output sequence, or the full sequence. -#' -#' @param return_state Boolean (default `FALSE`). Whether to return the last state -#' in addition to the output. -#' -#' @param go_backwards Boolean (default `FALSE`). -#' If `TRUE`, process the input sequence backwards and return the -#' reversed sequence. -#' -#' @param stateful Boolean (default `FALSE`). If `TRUE`, the last state -#' for each sample at index `i` in a batch will be used as initial -#' state for the sample of index `i` in the following batch. -#' -#' @param unroll Boolean (default `FALSE`). -#' If TRUE, the network will be unrolled, else a symbolic loop will be used. -#' Unrolling can speed-up a RNN, although it tends to be more -#' memory-intensive. Unrolling is only suitable for short sequences. -#' -#' @param time_major The shape format of the `inputs` and `outputs` tensors. -#' If `TRUE`, the inputs and outputs will be in shape -#' `(timesteps, batch, ...)`, whereas in the FALSE case, it will be -#' `(batch, timesteps, ...)`. Using `time_major = TRUE` is a bit more -#' efficient because it avoids transposes at the beginning and end of the -#' RNN calculation. However, most TensorFlow data is batch-major, so by -#' default this function accepts input and emits output in batch-major -#' form. -#' -#' @param zero_output_for_mask Boolean (default `FALSE`). -#' Whether the output should use zeros for the masked timesteps. Note that -#' this field is only used when `return_sequences` is TRUE and mask is -#' provided. It can useful if you want to reuse the raw output sequence of -#' the RNN without interference from the masked timesteps, eg, merging -#' bidirectional RNNs. -#' -#' @param ... standard layer arguments. -#' -#' @section Call arguments: -#' - `inputs`: Input tensor. -#' - `mask`: Binary tensor of shape `[batch_size, timesteps]` indicating whether -#' a given timestep should be masked. An individual `TRUE` entry indicates -#' that the corresponding timestep should be utilized, while a `FALSE` -#' entry indicates that the corresponding timestep should be ignored. -#' - `training`: R or Python Boolean indicating whether the layer should behave in -#' training mode or in inference mode. This argument is passed to the cell -#' when calling it. This is for use with cells that use dropout. -#' - `initial_state`: List of initial state tensors to be passed to the first -#' call of the cell. -#' - `constants`: List of constant tensors to be passed to the cell at each -#' timestep. -#' -#' @template roxlate-recurrent-layer -#' -#' @seealso -#' + -#' + -#' + `reticulate::py_help(keras$layers$RNN)` -#' -#' @export -layer_rnn <- -function(object, cell, - return_sequences = FALSE, - return_state = FALSE, - go_backwards = FALSE, - stateful = FALSE, - unroll = FALSE, - time_major = FALSE, - ..., - zero_output_for_mask = FALSE) -{ - args <- capture_args(match.call(), ignore = "object") - create_layer(keras$layers$RNN, object, args) -} - - -#' Cell class for SimpleRNN -#' -#' @details -#' See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) -#' for details about the usage of RNN API. -#' -#' This class processes one step within the whole time sequence input, whereas -#' `tf.keras.layer.SimpleRNN` processes the whole sequence. -#' -#' @param units Positive integer, dimensionality of the output space. -#' -#' @param activation Activation function to use. -#' Default: hyperbolic tangent (`tanh`). -#' If you pass `NULL`, no activation is applied -#' (ie. "linear" activation: `a(x) = x`). -#' -#' @param use_bias Boolean, (default `TRUE`), whether the layer uses a bias vector. -#' -#' @param kernel_initializer Initializer for the `kernel` weights matrix, -#' used for the linear transformation of the inputs. Default: -#' `glorot_uniform`. -#' -#' @param recurrent_initializer Initializer for the `recurrent_kernel` -#' weights matrix, used for the linear transformation of the recurrent state. -#' Default: `orthogonal`. -#' -#' @param bias_initializer Initializer for the bias vector. Default: `zeros`. -#' -#' @param kernel_regularizer Regularizer function applied to the `kernel` weights -#' matrix. Default: `NULL`. -#' -#' @param recurrent_regularizer Regularizer function applied to the -#' `recurrent_kernel` weights matrix. Default: `NULL`. -#' -#' @param bias_regularizer Regularizer function applied to the bias vector. Default: -#' `NULL`. -#' -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. Default: `NULL`. -#' -#' @param recurrent_constraint Constraint function applied to the `recurrent_kernel` -#' weights matrix. Default: `NULL`. -#' -#' @param bias_constraint Constraint function applied to the bias vector. Default: -#' `NULL`. -#' -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the linear -#' transformation of the inputs. Default: 0. -#' -#' @param recurrent_dropout Float between 0 and 1. Fraction of the units to drop for -#' the linear transformation of the recurrent state. Default: 0. -#' -#' @param ... standard layer arguments. -#' -#' @family RNN cell layers -#' -#' @seealso -#' + -#' + -#' @export -layer_simple_rnn_cell <- -function(units, - activation = "tanh", - use_bias = TRUE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - dropout = 0, - recurrent_dropout = 0, - ...) -{ - args <- capture_args(match.call(), list(units = as.integer)) - do.call(keras$layers$SimpleRNNCell, args) -} - - - - -#' Cell class for the GRU layer -#' -#' @details -#' See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) -#' for details about the usage of RNN API. -#' -#' This class processes one step within the whole time sequence input, whereas -#' `tf.keras.layer.GRU` processes the whole sequence. -#' -#' For example: -#' ````r -#' inputs <- k_random_uniform(c(32, 10, 8)) -#' output <- inputs %>% layer_rnn(layer_gru_cell(4)) -#' output$shape # TensorShape([32, 4]) -#' -#' rnn <- layer_rnn(cell = layer_gru_cell(4), -#' return_sequence = TRUE, -#' return_state = TRUE) -#' c(whole_sequence_output, final_state) %<-% rnn(inputs) -#' whole_sequence_output$shape # TensorShape([32, 10, 4]) -#' final_state$shape # TensorShape([32, 4]) -#' ```` -#' -#' @param units Positive integer, dimensionality of the output space. -#' -#' @param activation Activation function to use. Default: hyperbolic tangent -#' (`tanh`). If you pass `NULL`, no activation is applied -#' (ie. "linear" activation: `a(x) = x`). -#' -#' @param recurrent_activation Activation function to use for the recurrent step. -#' Default: sigmoid (`sigmoid`). If you pass `NULL`, no activation is -#' applied (ie. "linear" activation: `a(x) = x`). -#' -#' @param use_bias Boolean, (default `TRUE`), whether the layer uses a bias vector. -#' -#' @param kernel_initializer Initializer for the `kernel` weights matrix, -#' used for the linear transformation of the inputs. Default: -#' `glorot_uniform`. -#' -#' @param recurrent_initializer Initializer for the `recurrent_kernel` -#' weights matrix, used for the linear transformation of the recurrent state. -#' Default: `orthogonal`. -#' -#' @param bias_initializer Initializer for the bias vector. Default: `zeros`. -#' -#' @param kernel_regularizer Regularizer function applied to the `kernel` weights -#' matrix. Default: `NULL`. -#' -#' @param recurrent_regularizer Regularizer function applied to the -#' `recurrent_kernel` weights matrix. Default: `NULL`. -#' -#' @param bias_regularizer Regularizer function applied to the bias vector. Default: -#' `NULL`. -#' -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. Default: `NULL`. -#' -#' @param recurrent_constraint Constraint function applied to the `recurrent_kernel` -#' weights matrix. Default: `NULL`. -#' -#' @param bias_constraint Constraint function applied to the bias vector. Default: -#' `NULL`. -#' -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the -#' linear transformation of the inputs. Default: 0. -#' -#' @param recurrent_dropout Float between 0 and 1. Fraction of the units to drop for -#' the linear transformation of the recurrent state. Default: 0. -#' -#' @param reset_after GRU convention (whether to apply reset gate after or -#' before matrix multiplication). FALSE = "before", -#' TRUE = "after" (default and CuDNN compatible). -#' @param ... standard layer arguments. -#' -#' @family RNN cell layers -#' -#' @seealso -#' + -#' -#' @export -layer_gru_cell <- -function(units, - activation = "tanh", - recurrent_activation = "sigmoid", - use_bias = TRUE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - dropout = 0, - recurrent_dropout = 0, - reset_after = TRUE, - ...) -{ - args <- capture_args(match.call(), list(units = as.integer)) - do.call(keras$layers$GRUCell, args) -} - - -#' Wrapper allowing a stack of RNN cells to behave as a single cell -#' -#' Used to implement efficient stacked RNNs. -#' -#' @param cells List of RNN cell instances. -#' @param ... standard layer arguments. -#' -#' @family RNN cell layers -#' -#' @seealso -#' + -#' -#' @export -layer_stacked_rnn_cells <- -function(cells, ...) -{ - args <- capture_args(match.call()) - do.call(keras$layers$StackedRNNCells, args) -} - - -#' Cell class for the LSTM layer -#' -#' @details -#' See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn) -#' for details about the usage of RNN API. -#' -#' This class processes one step within the whole time sequence input, whereas -#' `tf$keras$layer$LSTM` processes the whole sequence. -#' -#' For example: -#' ````r -#' inputs <- k_random_normal(c(32, 10, 8)) -#' rnn <- layer_rnn(cell = layer_lstm_cell(units = 4)) -#' output <- rnn(inputs) -#' dim(output) # (32, 4) -#' -#' rnn <- layer_rnn(cell = layer_lstm_cell(units = 4), -#' return_sequences = TRUE, -#' return_state = TRUE) -#' c(whole_seq_output, final_memory_state, final_carry_state) %<-% rnn(inputs) -#' -#' dim(whole_seq_output) # (32, 10, 4) -#' dim(final_memory_state) # (32, 4) -#' dim(final_carry_state) # (32, 4) -#' ```` -#' -#' @param units Positive integer, dimensionality of the output space. -#' -#' @param activation Activation function to use. Default: hyperbolic tangent -#' (`tanh`). If you pass `NULL`, no activation is applied (ie. "linear" -#' activation: `a(x) = x`). -#' -#' @param recurrent_activation Activation function to use for the recurrent step. -#' Default: sigmoid (`sigmoid`). If you pass `NULL`, no activation is applied -#' (ie. "linear" activation: `a(x) = x`). -#' -#' @param use_bias Boolean, (default `TRUE`), whether the layer uses a bias vector. -#' -#' @param kernel_initializer Initializer for the `kernel` weights matrix, used for -#' the linear transformation of the inputs. Default: `glorot_uniform`. -#' -#' @param recurrent_initializer Initializer for the `recurrent_kernel` weights -#' matrix, used for the linear transformation of the recurrent state. -#' Default: `orthogonal`. -#' -#' @param bias_initializer Initializer for the bias vector. Default: `zeros`. -#' -#' @param unit_forget_bias Boolean (default `TRUE`). If TRUE, add 1 to the bias of -#' the forget gate at initialization. Setting it to true will also force -#' `bias_initializer="zeros"`. This is recommended in [Jozefowicz et -#' al.](https://proceedings.mlr.press/v37/jozefowicz15.pdf) -#' -#' @param kernel_regularizer Regularizer function applied to the `kernel` weights -#' matrix. Default: `NULL`. -#' -#' @param recurrent_regularizer Regularizer function applied to -#' the `recurrent_kernel` weights matrix. Default: `NULL`. -#' -#' @param bias_regularizer Regularizer function applied to the bias vector. Default: -#' `NULL`. -#' -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. Default: `NULL`. -#' -#' @param recurrent_constraint Constraint function applied to the `recurrent_kernel` -#' weights matrix. Default: `NULL`. -#' -#' @param bias_constraint Constraint function applied to the bias vector. Default: -#' `NULL`. -#' -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the linear -#' transformation of the inputs. Default: 0. -#' -#' @param recurrent_dropout Float between 0 and 1. Fraction of the units to drop for -#' the linear transformation of the recurrent state. Default: 0. -#' -#' @param ... standard layer arguments. -#' -#' @family RNN cell layers -#' -#' @seealso -#' + -#' -#' @export -layer_lstm_cell <- -function(units, - activation = "tanh", - recurrent_activation = "sigmoid", - use_bias = TRUE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - unit_forget_bias = TRUE, - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - dropout = 0, - recurrent_dropout = 0, - ...) -{ - args <- capture_args(match.call(), list(units = as.integer)) - do.call(keras$layers$LSTMCell, args) -} diff --git a/R/layers-recurrent.R b/R/layers-recurrent.R deleted file mode 100644 index 6351f312c2..0000000000 --- a/R/layers-recurrent.R +++ /dev/null @@ -1,341 +0,0 @@ - -#' Fully-connected RNN where the output is to be fed back to input. -#' -#' @inheritParams layer_dense -#' -#' @param units Positive integer, dimensionality of the output space. -#' @param activation Activation function to use. Default: hyperbolic tangent -#' (`tanh`). If you pass `NULL`, no activation is applied -#' (ie. "linear" activation: `a(x) = x`). -#' @param use_bias Boolean, whether the layer uses a bias vector. -#' @param return_sequences Boolean. Whether to return the last output in the -#' output sequence, or the full sequence. -#' @param return_state Boolean (default FALSE). Whether to return the last state -#' in addition to the output. -#' @param go_backwards Boolean (default FALSE). If TRUE, process the input -#' sequence backwards and return the reversed sequence. -#' @param stateful Boolean (default FALSE). If TRUE, the last state for each -#' sample at index i in a batch will be used as initial state for the sample -#' of index i in the following batch. -#' @param unroll Boolean (default FALSE). If TRUE, the network will be unrolled, -#' else a symbolic loop will be used. Unrolling can speed-up a RNN, although -#' it tends to be more memory-intensive. Unrolling is only suitable for short -#' sequences. -#' @param kernel_initializer Initializer for the `kernel` weights matrix, used -#' for the linear transformation of the inputs. -#' @param recurrent_initializer Initializer for the `recurrent_kernel` weights -#' matrix, used for the linear transformation of the recurrent state. -#' @param bias_initializer Initializer for the bias vector. -#' @param kernel_regularizer Regularizer function applied to the `kernel` -#' weights matrix. -#' @param recurrent_regularizer Regularizer function applied to the -#' `recurrent_kernel` weights matrix. -#' @param bias_regularizer Regularizer function applied to the bias vector. -#' @param activity_regularizer Regularizer function applied to the output of the -#' layer (its "activation").. -#' @param kernel_constraint Constraint function applied to the `kernel` weights -#' matrix. -#' @param recurrent_constraint Constraint function applied to the -#' `recurrent_kernel` weights matrix. -#' @param bias_constraint Constraint function applied to the bias vector. -#' @param dropout Float between 0 and 1. Fraction of the units to drop for the -#' linear transformation of the inputs. -#' @param recurrent_dropout Float between 0 and 1. Fraction of the units to drop -#' for the linear transformation of the recurrent state. -#' @param ... Standard Layer args. -#' -#' @template roxlate-recurrent-layer -#' -#' @section References: -#' - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) -#' -#' -#' @export -layer_simple_rnn <- -function(object, - units, - activation = "tanh", - use_bias = TRUE, - return_sequences = FALSE, - return_state = FALSE, - go_backwards = FALSE, - stateful = FALSE, - unroll = FALSE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - activity_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - dropout = 0.0, - recurrent_dropout = 0.0, - ...) -{ - args <- capture_args(match.call(), list( - units = as.integer, - input_shape = normalize_shape, - batch_input_shape = normalize_shape, - batch_size = as_nullable_integer - ), ignore = "object") - create_layer(keras$layers$SimpleRNN, object, args) -} - - -#' Gated Recurrent Unit - Cho et al. -#' -#' There are two variants. The default one is based on 1406.1078v3 and -#' has reset gate applied to hidden state before matrix multiplication. The -#' other one is based on original 1406.1078v1 and has the order reversed. -#' -#' The second variant is compatible with CuDNNGRU (GPU-only) and allows -#' inference on CPU. Thus it has separate biases for `kernel` and -#' `recurrent_kernel`. Use `reset_after = TRUE` and -#' `recurrent_activation = "sigmoid"`. -#' -#' @inheritParams layer_simple_rnn -#' -#' @param recurrent_activation Activation function to use for the recurrent -#' step. -#' @param time_major If True, the inputs and outputs will be in shape -#' `[timesteps, batch, feature]`, whereas in the False case, it will be -#' `[batch, timesteps, feature]`. Using `time_major = TRUE` is a bit more -#' efficient because it avoids transposes at the beginning and end of the RNN -#' calculation. However, most TensorFlow data is batch-major, so by default -#' this function accepts input and emits output in batch-major form. -#' @param reset_after GRU convention (whether to apply reset gate after or -#' before matrix multiplication). FALSE = "before" (default), -#' TRUE = "after" (CuDNN compatible). -#' -#' -#' @template roxlate-recurrent-layer -#' -#' @section References: -#' - [Learning Phrase Representations using RNN Encoder-Decoder for Statistical -#' Machine Translation](https://arxiv.org/abs/1406.1078) -#' - [On the Properties of Neural Machine Translation: -#' Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) -#' - [Empirical -#' Evaluation of Gated Recurrent Neural Networks on Sequence -#' Modeling](https://arxiv.org/abs/1412.3555v1) -#' - [A Theoretically Grounded -#' Application of Dropout in Recurrent Neural -#' Networks](https://arxiv.org/abs/1512.05287) -#' -#' @export -layer_gru <- -function(object, - units, - activation = "tanh", - recurrent_activation = "sigmoid", - use_bias = TRUE, - return_sequences = FALSE, - return_state = FALSE, - go_backwards = FALSE, - stateful = FALSE, - unroll = FALSE, - time_major = FALSE, - reset_after = TRUE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - activity_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - dropout = 0.0, - recurrent_dropout = 0.0, - ...) -{ - args <- capture_args(match.call(), list( - units = as.integer, - input_shape = normalize_shape, - batch_input_shape = normalize_shape, - batch_size = as_nullable_integer - ), ignore = "object") - create_layer(keras$layers$GRU, object, args) -} - - - - -#' (Deprecated) Fast GRU implementation backed by [CuDNN](https://developer.nvidia.com/cudnn). -#' -#' Can only be run on GPU, with the TensorFlow backend. -#' -#' @inheritParams layer_simple_rnn -#' @inheritParams layer_dense -#' -#' @family recurrent layers -#' -#' @section References: -#' - [On the Properties of Neural Machine Translation: -#' Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) -#' - [Empirical -#' Evaluation of Gated Recurrent Neural Networks on Sequence -#' Modeling](https://arxiv.org/abs/1412.3555v1) -#' - [A Theoretically Grounded -#' Application of Dropout in Recurrent Neural -#' Networks](https://arxiv.org/abs/1512.05287) -#' -#' @keywords internal -#' @export -layer_cudnn_gru <- function(object, units, - kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", bias_initializer = "zeros", - kernel_regularizer = NULL, recurrent_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, recurrent_constraint = NULL, bias_constraint = NULL, - return_sequences = FALSE, return_state = FALSE, stateful = FALSE, - input_shape = NULL, batch_input_shape = NULL, batch_size = NULL, - dtype = NULL, name = NULL, trainable = NULL, weights = NULL) { - - warning("layer_cudnn_gru() is deprecated since Tensorflow v2.0. Please use layer_gru() directly. ", - "layer_gru() will leverage CuDNN kernels by default if a GPU is available and certain constraints are met. ", - "See vignette 'Working with RNN's' for details.") - args <- list( - units = as.integer(units), - kernel_initializer = kernel_initializer, - recurrent_initializer = recurrent_initializer, - bias_initializer = bias_initializer, - kernel_regularizer = kernel_regularizer, - recurrent_regularizer = recurrent_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - recurrent_constraint = recurrent_constraint, - bias_constraint = bias_constraint, - return_sequences = return_sequences, - return_state = return_state, - stateful = stateful, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - create_layer(tensorflow::tf$compat$v1$keras$layers$CuDNNGRU, object, args) -} - - -#' Long Short-Term Memory unit - Hochreiter 1997. -#' -#' For a step-by-step description of the algorithm, see [this tutorial](https://colah.github.io/posts/2015-08-Understanding-LSTMs/). -#' -#' @inheritParams layer_gru -#' -#' @param unit_forget_bias Boolean. If TRUE, add 1 to the bias of the forget -#' gate at initialization. Setting it to true will also force -#' `bias_initializer="zeros"`. This is recommended in [Jozefowicz et -#' al.](https://proceedings.mlr.press/v37/jozefowicz15.pdf) -#' -#' @template roxlate-recurrent-layer -#' -#' @section References: -#' - [Long short-term memory](http://www.bioinf.jku.at/publications/older/2604.pdf) (original 1997 paper) -#' - [Supervised sequence labeling with recurrent neural networks](https://www.cs.toronto.edu/~graves/preprint.pdf) -#' - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) -#' -#' @family recurrent layers -#' -#' @export -layer_lstm <- -function(object, - units, - activation = "tanh", - recurrent_activation = "sigmoid", - use_bias = TRUE, - return_sequences = FALSE, - return_state = FALSE, - go_backwards = FALSE, - stateful = FALSE, - time_major = FALSE, - unroll = FALSE, - kernel_initializer = "glorot_uniform", - recurrent_initializer = "orthogonal", - bias_initializer = "zeros", - unit_forget_bias = TRUE, - kernel_regularizer = NULL, - recurrent_regularizer = NULL, - bias_regularizer = NULL, - activity_regularizer = NULL, - kernel_constraint = NULL, - recurrent_constraint = NULL, - bias_constraint = NULL, - dropout = 0.0, - recurrent_dropout = 0.0, - ... -) -{ - args <- capture_args(match.call(), list( - units = as.integer, - input_shape = normalize_shape, - batch_input_shape = normalize_shape, - batch_size = as_nullable_integer - ), ignore = "object") - create_layer(keras$layers$LSTM, object, args) -} - -#' (Deprecated) Fast LSTM implementation backed by [CuDNN](https://developer.nvidia.com/cudnn). -#' -#' Can only be run on GPU, with the TensorFlow backend. -#' -#' @inheritParams layer_lstm -#' @inheritParams layer_dense -#' -#' @section References: -#' - [Long short-term memory](http://www.bioinf.jku.at/publications/older/2604.pdf) (original 1997 paper) -#' - [Supervised sequence labeling with recurrent neural networks](https://www.cs.toronto.edu/~graves/preprint.pdf) -#' - [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) -#' -#' @family recurrent layers -#' -#' @keywords internal -#' @export -layer_cudnn_lstm <- function(object, units, - kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", - bias_initializer = "zeros", unit_forget_bias = TRUE, - kernel_regularizer = NULL, recurrent_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, - kernel_constraint = NULL, recurrent_constraint = NULL, bias_constraint = NULL, - return_sequences = FALSE, return_state = FALSE, stateful = FALSE, - input_shape = NULL, batch_input_shape = NULL, batch_size = NULL, - dtype = NULL, name = NULL, trainable = NULL, weights = NULL) { - - warning("layer_cudnn_lstm() is deprecated since Tensorflow v2.0. Please use layer_lstm() directly. ", - "layer_lstm() will leverage CuDNN kernels by default if a GPU is available and certain constraints are met. ", - "See vignette 'Working with RNN's' for details.") - - args <- list( - units = as.integer(units), - kernel_initializer = kernel_initializer, - recurrent_initializer = recurrent_initializer, - bias_initializer = bias_initializer, - unit_forget_bias = unit_forget_bias, - kernel_regularizer = kernel_regularizer, - recurrent_regularizer = recurrent_regularizer, - bias_regularizer = bias_regularizer, - activity_regularizer = activity_regularizer, - kernel_constraint = kernel_constraint, - recurrent_constraint = recurrent_constraint, - bias_constraint = bias_constraint, - return_sequences = return_sequences, - return_state = return_state, - stateful = stateful, - input_shape = normalize_shape(input_shape), - batch_input_shape = normalize_shape(batch_input_shape), - batch_size = as_nullable_integer(batch_size), - dtype = dtype, - name = name, - trainable = trainable, - weights = weights - ) - - create_layer(tensorflow::tf$compat$v1$keras$layers$CuDNNLSTM, object, args) -} diff --git a/R/layers-regularization.R b/R/layers-regularization.R new file mode 100644 index 0000000000..f9a77cc179 --- /dev/null +++ b/R/layers-regularization.R @@ -0,0 +1,447 @@ + + +#' Layer that applies an update to the cost function based input activity. +#' +#' @description +#' +#' # Input Shape +#' Arbitrary. Use the keyword argument `input_shape` +#' (tuple of integers, does not include the samples axis) +#' when using this layer as the first layer in a model. +#' +#' # Output Shape +#' Same shape as input. +#' +#' @param l1 +#' L1 regularization factor (positive float). +#' +#' @param l2 +#' L2 regularization factor (positive float). +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family regularization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.ActivityRegularization +layer_activity_regularization <- +function (object, l1 = 0, l2 = 0, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$ActivityRegularization, object, + args) +} + + +#' Applies dropout to the input. +#' +#' @description +#' The `Dropout` layer randomly sets input units to 0 with a frequency of +#' `rate` at each step during training time, which helps prevent overfitting. +#' Inputs not set to 0 are scaled up by `1 / (1 - rate)` such that the sum over +#' all inputs is unchanged. +#' +#' Note that the `Dropout` layer only applies when `training` is set to `TRUE` +#' in `call()`, such that no values are dropped during inference. +#' When using `model.fit`, `training` will be appropriately set to `TRUE` +#' automatically. In other contexts, you can set the argument explicitly +#' to `TRUE` when calling the layer. +#' +#' (This is in contrast to setting `trainable=FALSE` for a `Dropout` layer. +#' `trainable` does not affect the layer's behavior, as `Dropout` does +#' not have any variables/weights that can be frozen during training.) +#' +#' # Call Arguments +#' - `inputs`: Input tensor (of any rank). +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode (adding dropout) or in inference mode (doing nothing). +#' +#' @param rate +#' Float between 0 and 1. Fraction of the input units to drop. +#' +#' @param noise_shape +#' 1D integer tensor representing the shape of the +#' binary dropout mask that will be multiplied with the input. +#' For instance, if your inputs have shape +#' `(batch_size, timesteps, features)` and +#' you want the dropout mask to be the same for all timesteps, +#' you can use `noise_shape=(batch_size, 1, features)`. +#' +#' @param seed +#' An R integer to use as random seed. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family regularization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Dropout +layer_dropout <- +function (object, rate, noise_shape = NULL, seed = NULL, ...) +{ + args <- capture_args(list(noise_shape = as_integer, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$Dropout, object, args) +} + + +#' Applies Alpha Dropout to the input. +#' +#' @description +#' Alpha Dropout is a `Dropout` that keeps mean and variance of inputs +#' to their original values, in order to ensure the self-normalizing property +#' even after this dropout. +#' Alpha Dropout fits well to Scaled Exponential Linear Units (SELU) by +#' randomly setting activations to the negative saturation value. +#' +#' # Call Arguments +#' - `inputs`: Input tensor (of any rank). +#' - `training`: R boolean indicating whether the layer should behave in +#' training mode (adding alpha dropout) or in inference mode +#' (doing nothing). +#' +#' @param rate +#' Float between 0 and 1. The multiplicative noise will have +#' standard deviation `sqrt(rate / (1 - rate))`. +#' +#' @param noise_shape +#' 1D integer tensor representing the shape of the +#' binary alpha dropout mask that will be multiplied with the input. +#' For instance, if your inputs have shape +#' `(batch_size, timesteps, features)` and +#' you want the alpha dropout mask to be the same for all timesteps, +#' you can use `noise_shape = (batch_size, 1, features)`. +#' +#' @param seed +#' An integer to use as random seed. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family regularization layers +#' @family layers +#' @tether keras.layers.AlphaDropout +#' @seealso +#' + +layer_alpha_dropout <- +function (object, rate, noise_shape = NULL, seed = NULL, ...) +{ + args <- capture_args(list(noise_shape = as_integer_array, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$AlphaDropout, object, args) +} + +#' Apply multiplicative 1-centered Gaussian noise. +#' +#' @description +#' As it is a regularization layer, it is only active at training time. +#' +#' # Call Arguments +#' - `inputs`: Input tensor (of any rank). +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode (adding dropout) or in inference mode (doing nothing). +#' +#' @param rate +#' Float, drop probability (as with `Dropout`). +#' The multiplicative noise will have +#' standard deviation `sqrt(rate / (1 - rate))`. +#' +#' @param seed +#' Integer, optional random seed to enable deterministic behavior. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family regularization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GaussianDropout +layer_gaussian_dropout <- +function (object, rate, seed = NULL, ...) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GaussianDropout, object, args) +} + + +#' Apply additive zero-centered Gaussian noise. +#' +#' @description +#' This is useful to mitigate overfitting +#' (you could see it as a form of random data augmentation). +#' Gaussian Noise (GS) is a natural choice as corruption process +#' for real valued inputs. +#' +#' As it is a regularization layer, it is only active at training time. +#' +#' # Call Arguments +#' - `inputs`: Input tensor (of any rank). +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode (adding noise) or in inference mode (doing nothing). +#' +#' @param stddev +#' Float, standard deviation of the noise distribution. +#' +#' @param seed +#' Integer, optional random seed to enable deterministic behavior. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family regularization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GaussianNoise +layer_gaussian_noise <- +function (object, stddev, seed = NULL, ...) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$GaussianNoise, object, args) +} + + +#' Spatial 1D version of Dropout. +#' +#' @description +#' This layer performs the same function as Dropout, however, it drops +#' entire 1D feature maps instead of individual elements. If adjacent frames +#' within feature maps are strongly correlated (as is normally the case in +#' early convolution layers) then regular dropout will not regularize the +#' activations and will otherwise just result in an effective learning rate +#' decrease. In this case, `SpatialDropout1D` will help promote independence +#' between feature maps and should be used instead. +#' +#' # Call Arguments +#' - `inputs`: A 3D tensor. +#' - `training`: Python boolean indicating whether the layer +#' should behave in training mode (applying dropout) +#' or in inference mode (pass-through). +#' +#' # Input Shape +#' 3D tensor with shape: `(samples, timesteps, channels)` +#' +#' # Output Shape +#' Same as input. +#' +#' # Reference +#' - [Tompson et al., 2014](https://arxiv.org/abs/1411.4280) +#' +#' @param rate +#' Float between 0 and 1. Fraction of the input units to drop. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param seed +#' Initial seed for the random number generator +#' +#' @param dtype +#' datatype (e.g., `"float32"`). +#' +#' @inherit layer_dense return +#' @export +#' @family spatial dropout regularization layers +#' @family regularization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.SpatialDropout1D +layer_spatial_dropout_1d <- +function (object, rate, seed = NULL, name = NULL, dtype = NULL) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$SpatialDropout1D, object, args) +} + + +#' Spatial 2D version of Dropout. +#' +#' @description +#' This version performs the same function as Dropout, however, it drops +#' entire 2D feature maps instead of individual elements. If adjacent pixels +#' within feature maps are strongly correlated (as is normally the case in +#' early convolution layers) then regular dropout will not regularize the +#' activations and will otherwise just result in an effective learning rate +#' decrease. In this case, `SpatialDropout2D` will help promote independence +#' between feature maps and should be used instead. +#' +#' # Call Arguments +#' - `inputs`: A 4D tensor. +#' - `training`: Python boolean indicating whether the layer +#' should behave in training mode (applying dropout) +#' or in inference mode (pass-through). +#' +#' # Input Shape +#' 4D tensor with shape: `(samples, channels, rows, cols)` if +#' data_format='channels_first' +#' or 4D tensor with shape: `(samples, rows, cols, channels)` if +#' data_format='channels_last'. +#' +#' # Output Shape +#' Same as input. +#' +#' # Reference +#' - [Tompson et al., 2014](https://arxiv.org/abs/1411.4280) +#' +#' @param rate +#' Float between 0 and 1. Fraction of the input units to drop. +#' +#' @param data_format +#' `"channels_first"` or `"channels_last"`. +#' In `"channels_first"` mode, the channels dimension (the depth) +#' is at index 1, in `"channels_last"` mode is it at index 3. +#' It defaults to the `image_data_format` value found in your +#' Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param seed +#' Initial seed for the random number generator +#' +#' @param dtype +#' datatype (e.g., `"float32"`). +#' +#' @inherit layer_dense return +#' @export +#' @family spatial dropout regularization layers +#' @family regularization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.SpatialDropout2D +layer_spatial_dropout_2d <- +function (object, rate, data_format = NULL, seed = NULL, name = NULL, + dtype = NULL) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$SpatialDropout2D, object, args) +} + + +#' Spatial 3D version of Dropout. +#' +#' @description +#' This version performs the same function as Dropout, however, it drops +#' entire 3D feature maps instead of individual elements. If adjacent voxels +#' within feature maps are strongly correlated (as is normally the case in +#' early convolution layers) then regular dropout will not regularize the +#' activations and will otherwise just result in an effective learning rate +#' decrease. In this case, SpatialDropout3D will help promote independence +#' between feature maps and should be used instead. +#' +#' # Call Arguments +#' - `inputs`: A 5D tensor. +#' - `training`: Python boolean indicating whether the layer +#' should behave in training mode (applying dropout) +#' or in inference mode (pass-through). +#' +#' # Input Shape +#' 5D tensor with shape: `(samples, channels, dim1, dim2, dim3)` if +#' data_format='channels_first' +#' or 5D tensor with shape: `(samples, dim1, dim2, dim3, channels)` if +#' data_format='channels_last'. +#' +#' # Output Shape +#' Same as input. +#' +#' # Reference +#' - [Tompson et al., 2014](https://arxiv.org/abs/1411.4280) +#' +#' @param rate +#' Float between 0 and 1. Fraction of the input units to drop. +#' +#' @param data_format +#' `"channels_first"` or `"channels_last"`. +#' In `"channels_first"` mode, the channels dimension (the depth) +#' is at index 1, in `"channels_last"` mode is it at index 4. +#' It defaults to the `image_data_format` value found in your +#' Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param name +#' String, name for the object +#' +#' @param seed +#' Initial seed for the random number generator +#' +#' @param dtype +#' datatype (e.g., `"float32"`). +#' +#' @inherit layer_dense return +#' @export +#' @family spatial dropout regularization layers +#' @family regularization layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.SpatialDropout3D +layer_spatial_dropout_3d <- +function (object, rate, data_format = NULL, seed = NULL, name = NULL, + dtype = NULL) +{ + args <- capture_args(list(seed = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$SpatialDropout3D, object, args) +} diff --git a/R/layers-reshaping.R b/R/layers-reshaping.R new file mode 100644 index 0000000000..8115e9d329 --- /dev/null +++ b/R/layers-reshaping.R @@ -0,0 +1,810 @@ + + +#' Cropping layer for 1D input (e.g. temporal sequence). +#' +#' @description +#' It crops along the time dimension (axis 2). +#' +#' # Example +#' ```{r} +#' input_shape <- c(2, 3, 2) +#' x <- op_arange(prod(input_shape)) |> op_reshape(input_shape) +#' x +#' +#' y <- x |> layer_cropping_1d(cropping = 1) +#' y +#' ``` +#' +#' # Input Shape +#' 3D tensor with shape `(batch_size, axis_to_crop, features)` +#' +#' # Output Shape +#' 3D tensor with shape `(batch_size, cropped_axis, features)` +#' +#' @param cropping +#' Int, or list of int (length 2). +#' - If int: how many units should be trimmed off at the beginning and +#' end of the cropping dimension (axis 1). +#' - If list of 2 ints: how many units should be trimmed off at the +#' beginning and end of the cropping dimension +#' (`(left_crop, right_crop)`). +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Cropping1D +layer_cropping_1d <- +function (object, cropping = list(1L, 1L), ...) +{ + args <- capture_args(list(cropping = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Cropping1D, object, args) +} + + +#' Cropping layer for 2D input (e.g. picture). +#' +#' @description +#' It crops along spatial dimensions, i.e. height and width. +#' +#' # Example +#' ```{r} +#' input_shape <- c(2, 28, 28, 3) +#' x <- op_arange(prod(input_shape), dtype ='int32') |> op_reshape(input_shape) +#' y <- x |> layer_cropping_2d(cropping=list(c(2, 2), c(4, 4))) +#' shape(y) +#' ``` +#' +#' # Input Shape +#' 4D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, height, width, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' 4D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, cropped_height, cropped_width, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, cropped_height, cropped_width)` +#' +#' @param cropping +#' Int, or list of 2 ints, or list of 2 lists of 2 ints. +#' - If int: the same symmetric cropping is applied to height and +#' width. +#' - If list of 2 ints: interpreted as two different symmetric +#' cropping values for height and width: +#' `(symmetric_height_crop, symmetric_width_crop)`. +#' - If list of 2 lists of 2 ints: interpreted as +#' `((top_crop, bottom_crop), (left_crop, right_crop))`. +#' +#' @param data_format +#' A string, one of `"channels_last"` (default) or +#' `"channels_first"`. The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch_size, height, width, channels)` while `"channels_first"` +#' corresponds to inputs with shape +#' `(batch_size, channels, height, width)`. +#' When unspecified, uses `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json` (if exists). Defaults to +#' `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Cropping2D +layer_cropping_2d <- +function (object, cropping = list(list(0L, 0L), list(0L, 0L)), + data_format = NULL, ...) +{ + args <- capture_args(list(cropping = function (x) + normalize_cropping(x, 2L), input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Cropping2D, object, args) +} + + +#' Cropping layer for 3D data (e.g. spatial or spatio-temporal). +#' +#' @description +#' +#' # Example +#' ```{r} +#' input_shape <- c(2, 28, 28, 10, 3) +#' x <- input_shape %>% { op_reshape(seq(prod(.)), .) } +#' y <- x |> layer_cropping_3d(cropping = c(2, 4, 2)) +#' shape(y) +#' ``` +#' +#' # Input Shape +#' 5D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, first_axis_to_crop, second_axis_to_crop, +#' third_axis_to_crop, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, first_axis_to_crop, second_axis_to_crop, +#' third_axis_to_crop)` +#' +#' # Output Shape +#' 5D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, first_cropped_axis, second_cropped_axis, +#' third_cropped_axis, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, first_cropped_axis, second_cropped_axis, +#' third_cropped_axis)` +#' +#' @param cropping +#' Int, or list of 3 ints, or list of 3 lists of 2 ints. +#' - If int: the same symmetric cropping is applied to depth, height, +#' and width. +#' - If list of 3 ints: interpreted as three different symmetric +#' cropping values for depth, height, and width: +#' `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. +#' - If list of 3 lists of 2 ints: interpreted as +#' `((left_dim1_crop, right_dim1_crop), (left_dim2_crop, +#' right_dim2_crop), (left_dim3_crop, right_dim3_crop))`. +#' +#' @param data_format +#' A string, one of `"channels_last"` (default) or +#' `"channels_first"`. The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' When unspecified, uses `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json` (if exists). Defaults to +#' `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Cropping3D +layer_cropping_3d <- +function (object, cropping = list(list(1L, 1L), list(1L, 1L), + list(1L, 1L)), data_format = NULL, ...) +{ + args <- capture_args(list(cropping = function (x) + normalize_cropping(x, 3L), input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Cropping3D, object, args) +} + + +#' Flattens the input. Does not affect the batch size. +#' +#' @description +#' +#' # Note +#' If inputs are shaped `(batch)` without a feature axis, then +#' flattening adds an extra channel dimension and output shape is `(batch, 1)`. +#' +#' # Example +#' ```{r} +#' x <- layer_input(shape=c(10, 64)) +#' y <- x |> layer_flatten() +#' shape(y) +#' ``` +#' +#' @param data_format +#' A string, one of `"channels_last"` (default) or +#' `"channels_first"`. The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch, ..., channels)` while `"channels_first"` corresponds to +#' inputs with shape `(batch, channels, ...)`. +#' When unspecified, uses `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json` (if exists). Defaults to +#' `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Flatten +layer_flatten <- +function (object, data_format = NULL, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Flatten, object, args) +} + + +#' Permutes the dimensions of the input according to a given pattern. +#' +#' @description +#' Useful e.g. connecting RNNs and convnets. +#' +#' # Input Shape +#' Arbitrary. +#' +#' # Output Shape +#' Same as the input shape, but with the dimensions re-ordered according +#' to the specified pattern. +#' +#' # Example +#' ```{r} +#' x <- layer_input(shape=c(10, 64)) +#' y <- layer_permute(x, c(2, 1)) +#' shape(y) +#' ``` +#' +#' @param dims +#' List of integers. Permutation pattern does not include the +#' batch dimension. Indexing starts at 1. +#' For instance, `c(2, 1)` permutes the first and second dimensions +#' of the input. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.Permute +layer_permute <- +function (object, dims, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + dims = function (x) + tuple(lapply(x, as_integer))), ignore = "object") + create_layer(keras$layers$Permute, object, args) +} + + +#' Repeats the input n times. +#' +#' @description +#' +#' # Example +#' ```{r} +#' x <- layer_input(shape = 32) +#' y <- layer_repeat_vector(x, n = 3) +#' shape(y) +#' ``` +#' +#' # Input Shape +#' 2D tensor with shape `(batch_size, features)`. +#' +#' # Output Shape +#' 3D tensor with shape `(batch_size, n, features)`. +#' +#' @param n +#' Integer, repetition factor. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.RepeatVector +layer_repeat_vector <- +function (object, n, ...) +{ + args <- capture_args(list(n = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RepeatVector, object, args) +} + + +#' Layer that reshapes inputs into the given shape. +#' +#' @description +#' +#' # Input Shape +#' Arbitrary, although all dimensions in the input shape must be +#' known/fixed. Use the keyword argument `input_shape` (list of integers, +#' does not include the samples/batch size axis) when using this layer as +#' the first layer in a model. +#' +#' # Output Shape +#' `(batch_size, *target_shape)` +#' +#' # Examples +#' ```{r} +#' x <- layer_input(shape = 12) +#' y <- layer_reshape(x, c(3, 4)) +#' shape(y) +#' ``` +#' +#' ```{r} +#' # also supports shape inference using `-1` as dimension +#' y <- layer_reshape(x, c(-1, 2, 2)) +#' shape(y) +#' ``` +#' +#' @param target_shape +#' Target shape. List of integers, does not include the +#' samples dimension (batch size). +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.Reshape +layer_reshape <- +function (object, target_shape, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape, + target_shape = as_integer), ignore = "object") + create_layer(keras$layers$Reshape, object, args) +} + + +#' Upsampling layer for 1D inputs. +#' +#' @description +#' Repeats each temporal step `size` times along the time axis. +#' +#' # Example +#' ```{r} +#' input_shape <- c(2, 2, 3) +#' x <- seq_len(prod(input_shape)) %>% op_reshape(input_shape) +#' x +#' y <- layer_upsampling_1d(x, size = 2) +#' y +#' ``` +#' +#' # Input Shape +#' 3D tensor with shape: `(batch_size, steps, features)`. +#' +#' # Output Shape +#' 3D tensor with shape: `(batch_size, upsampled_steps, features)`. +#' +#' @param size +#' Integer. Upsampling factor. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.UpSampling1D +layer_upsampling_1d <- +function (object, size = 2L, ...) +{ + args <- capture_args(list(size = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$UpSampling1D, object, args) +} + + +#' Upsampling layer for 2D inputs. +#' +#' @description +#' The implementation uses interpolative resizing, given the resize method +#' (specified by the `interpolation` argument). Use `interpolation=nearest` +#' to repeat the rows and columns of the data. +#' +#' # Example +#' ```{r} +#' input_shape <- c(2, 2, 1, 3) +#' x <- op_reshape(seq_len(prod(input_shape)), input_shape) +#' print(x) +#' y <- layer_upsampling_2d(x, size = c(1, 2)) +#' print(y) +#' ``` +#' +#' # Input Shape +#' 4D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, rows, cols, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, rows, cols)` +#' +#' # Output Shape +#' 4D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, upsampled_rows, upsampled_cols, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, upsampled_rows, upsampled_cols)` +#' +#' @param size +#' Int, or list of 2 integers. +#' The upsampling factors for rows and columns. +#' +#' @param data_format +#' A string, +#' one of `"channels_last"` (default) or `"channels_first"`. +#' The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch_size, height, width, channels)` while `"channels_first"` +#' corresponds to inputs with shape +#' `(batch_size, channels, height, width)`. +#' When unspecified, uses +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json` (if exists) else `"channels_last"`. +#' Defaults to `"channels_last"`. +#' +#' @param interpolation +#' A string, one of `"bicubic"`, `"bilinear"`, `"lanczos3"`, +#' `"lanczos5"`, `"nearest"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.UpSampling2D +layer_upsampling_2d <- +function (object, size = list(2L, 2L), data_format = NULL, interpolation = "nearest", + ...) +{ + args <- capture_args(list(size = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$UpSampling2D, object, args) +} + + +#' Upsampling layer for 3D inputs. +#' +#' @description +#' Repeats the 1st, 2nd and 3rd dimensions +#' of the data by `size[0]`, `size[1]` and `size[2]` respectively. +#' +#' # Example +#' ```{r} +#' input_shape <- c(2, 1, 2, 1, 3) +#' x <- array(1, dim = input_shape) +#' y <- layer_upsampling_3d(x, size = c(2, 2, 2)) +#' shape(y) +#' ``` +#' +#' # Input Shape +#' 5D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, dim1, dim2, dim3, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, dim1, dim2, dim3)` +#' +#' # Output Shape +#' 5D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, upsampled_dim1, upsampled_dim2, upsampled_dim3, +#' channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, upsampled_dim1, upsampled_dim2, +#' upsampled_dim3)` +#' +#' @param size +#' Int, or list of 3 integers. +#' The upsampling factors for dim1, dim2 and dim3. +#' +#' @param data_format +#' A string, +#' one of `"channels_last"` (default) or `"channels_first"`. +#' The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' When unspecified, uses +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json` (if exists) else `"channels_last"`. +#' Defaults to `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.UpSampling3D +layer_upsampling_3d <- +function (object, size = list(2L, 2L, 2L), data_format = NULL, + ...) +{ + args <- capture_args(list(size = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$UpSampling3D, object, args) +} + + +#' Zero-padding layer for 1D input (e.g. temporal sequence). +#' +#' @description +#' +#' # Example +#' ```{r} +#' input_shape <- c(2, 2, 3) +#' x <- op_reshape(seq_len(prod(input_shape)), input_shape) +#' x +#' y <- layer_zero_padding_1d(x, padding = 2) +#' y +#' ``` +#' +#' # Input Shape +#' 3D tensor with shape `(batch_size, axis_to_pad, features)` +#' +#' # Output Shape +#' 3D tensor with shape `(batch_size, padded_axis, features)` +#' +#' @param padding +#' Int, or list of int (length 2), or named listionary. +#' - If int: how many zeros to add at the beginning and end of +#' the padding dimension (axis 1). +#' - If list of 2 ints: how many zeros to add at the beginning and the +#' end of the padding dimension (`(left_pad, right_pad)`). +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.ZeroPadding1D +layer_zero_padding_1d <- +function (object, padding = 1L, ...) +{ + args <- capture_args(list(padding = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$ZeroPadding1D, object, args) +} + + +#' Zero-padding layer for 2D input (e.g. picture). +#' +#' @description +#' This layer can add rows and columns of zeros at the top, bottom, left and +#' right side of an image tensor. +#' +#' # Example +#' ```{r} +#' input_shape <- c(1, 1, 2, 2) +#' x <- op_reshape(seq_len(prod(input_shape)), input_shape) +#' x +#' y <- layer_zero_padding_2d(x, padding = 1) +#' y +#' ``` +#' +#' # Input Shape +#' 4D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, height, width, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, height, width)` +#' +#' # Output Shape +#' 4D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, padded_height, padded_width, channels)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, channels, padded_height, padded_width)` +#' +#' @param padding +#' Int, or list of 2 ints, or list of 2 lists of 2 ints. +#' - If int: the same symmetric padding is applied to height and width. +#' - If list of 2 ints: interpreted as two different symmetric padding +#' values for height and width: +#' `(symmetric_height_pad, symmetric_width_pad)`. +#' - If list of 2 lists of 2 ints: interpreted as +#' `((top_pad, bottom_pad), (left_pad, right_pad))`. +#' +#' @param data_format +#' A string, one of `"channels_last"` (default) or +#' `"channels_first"`. The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch_size, height, width, channels)` while `"channels_first"` +#' corresponds to inputs with shape +#' `(batch_size, channels, height, width)`. +#' When unspecified, uses `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json` (if exists). Defaults to +#' `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.ZeroPadding2D +layer_zero_padding_2d <- +function (object, padding = list(1L, 1L), data_format = NULL, + ...) +{ + args <- capture_args(list(padding = function (x) + normalize_padding(x, 2L), input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$ZeroPadding2D, object, args) +} + + +#' Zero-padding layer for 3D data (spatial or spatio-temporal). +#' +#' @description +#' +#' # Example +#' ```{r} +#' input_shape <- c(1, 1, 2, 2, 3) +#' x <- op_reshape(seq_len(prod(input_shape)), input_shape) +#' x +#' y <- layer_zero_padding_3d(x, padding = 2) +#' shape(y) +#' ``` +#' +#' # Input Shape +#' 5D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, first_axis_to_pad, second_axis_to_pad, +#' third_axis_to_pad, depth)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, depth, first_axis_to_pad, second_axis_to_pad, +#' third_axis_to_pad)` +#' +#' # Output Shape +#' 5D tensor with shape: +#' - If `data_format` is `"channels_last"`: +#' `(batch_size, first_padded_axis, second_padded_axis, +#' third_axis_to_pad, depth)` +#' - If `data_format` is `"channels_first"`: +#' `(batch_size, depth, first_padded_axis, second_padded_axis, +#' third_axis_to_pad)` +#' +#' @param padding +#' Int, or list of 3 ints, or list of 3 lists of 2 ints. +#' - If int: the same symmetric padding is applied to depth, height, +#' and width. +#' - If list of 3 ints: interpreted as three different symmetric +#' padding values for depth, height, and width: +#' `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`. +#' - If list of 3 lists of 2 ints: interpreted as +#' `((left_dim1_pad, right_dim1_pad), (left_dim2_pad, +#' right_dim2_pad), (left_dim3_pad, right_dim3_pad))`. +#' +#' @param data_format +#' A string, one of `"channels_last"` (default) or +#' `"channels_first"`. The ordering of the dimensions in the inputs. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. +#' When unspecified, uses `image_data_format` value found in your Keras +#' config file at `~/.keras/keras.json` (if exists). Defaults to +#' `"channels_last"`. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family reshaping layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.ZeroPadding3D +layer_zero_padding_3d <- +function (object, padding = list(list(1L, 1L), list(1L, 1L), + list(1L, 1L)), data_format = NULL, ...) +{ + args <- capture_args(list(padding = function (x) + normalize_padding(x, 3L), input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$ZeroPadding3D, object, args) +} diff --git a/R/layers-rnn.R b/R/layers-rnn.R new file mode 100644 index 0000000000..f5d398a9bf --- /dev/null +++ b/R/layers-rnn.R @@ -0,0 +1,1952 @@ + + +#' Bidirectional wrapper for RNNs. +#' +#' @description +#' +#' # Call Arguments +#' The call arguments for this layer are the same as those of the +#' wrapped RNN layer. Beware that when passing the `initial_state` +#' argument during the call of this layer, the first half in the +#' list of elements in the `initial_state` list will be passed to +#' the forward RNN call and the last half in the list of elements +#' will be passed to the backward RNN call. +#' +#' # Note +#' instantiating a `Bidirectional` layer from an existing RNN layer +#' instance will not reuse the weights state of the RNN layer instance -- the +#' `Bidirectional` layer will have freshly initialized weights. +#' +#' # Examples +#' ```{r} +#' model <- keras_model_sequential(input_shape = c(5, 10)) %>% +#' layer_bidirectional(layer_lstm(units = 10, return_sequences = TRUE)) %>% +#' layer_bidirectional(layer_lstm(units = 10)) %>% +#' layer_dense(5, activation = "softmax") +#' +#' model %>% compile(loss = "categorical_crossentropy", +#' optimizer = "rmsprop") +#' +#' # With custom backward layer +#' forward_layer <- layer_lstm(units = 10, return_sequences = TRUE) +#' backward_layer <- layer_lstm(units = 10, activation = "relu", +#' return_sequences = TRUE, go_backwards = TRUE) +#' +#' model <- keras_model_sequential(input_shape = c(5, 10)) %>% +#' bidirectional(forward_layer, backward_layer = backward_layer) %>% +#' layer_dense(5, activation = "softmax") +#' +#' model %>% compile(loss = "categorical_crossentropy", +#' optimizer = "rmsprop") +#' ``` +#' +#' # States +#' +#' A `Bidirectional` layer instance has property `states`, which you can access +#' with `layer$states`. You can also reset states using [`reset_state()`] +#' +#' +#' @param layer +#' `RNN` instance, such as +#' [`layer_lstm()`] or [`layer_gru()`]. +#' It could also be a [`Layer()`] instance +#' that meets the following criteria: +#' 1. Be a sequence-processing layer (accepts 3D+ inputs). +#' 2. Have a `go_backwards`, `return_sequences` and `return_state` +#' attribute (with the same semantics as for the `RNN` class). +#' 3. Have an `input_spec` attribute. +#' 4. Implement serialization via `get_config()` and `from_config()`. +#' Note that the recommended way to create new RNN layers is to write a +#' custom RNN cell and use it with [`layer_rnn()`], instead of +#' subclassing with [`Layer()`] directly. +#' When `return_sequences` is `TRUE`, the output of the masked +#' timestep will be zero regardless of the layer's original +#' `zero_output_for_mask` value. +#' +#' @param merge_mode +#' Mode by which outputs of the forward and backward RNNs +#' will be combined. One of `{"sum", "mul", "concat", "ave", NULL}`. +#' If `NULL`, the outputs will not be combined, +#' they will be returned as a list. Defaults to `"concat"`. +#' +#' @param backward_layer +#' Optional `RNN`, +#' or `Layer()` instance to be used to handle +#' backwards input processing. +#' If `backward_layer` is not provided, the layer instance passed +#' as the `layer` argument will be used to generate the backward layer +#' automatically. +#' Note that the provided `backward_layer` layer should have properties +#' matching those of the `layer` argument, in particular +#' it should have the same values for `stateful`, `return_states`, +#' `return_sequences`, etc. In addition, `backward_layer` +#' and `layer` should have different `go_backwards` argument values. +#' A `ValueError` will be raised if these requirements are not met. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param weights +#' see description +#' +#' @inherit layer_dense return +#' @export +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.Bidirectional +layer_bidirectional <- +function (object, layer, merge_mode = "concat", weights = NULL, + backward_layer = NULL, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$Bidirectional, object, args) +} + + +#' 1D Convolutional LSTM. +#' +#' @description +#' Similar to an LSTM layer, but the input transformations +#' and recurrent transformations are both convolutional. +#' +#' # Call Arguments +#' - `inputs`: A 4D tensor. +#' - `initial_state`: List of initial state tensors to be passed to the first +#' call of the cell. +#' - `mask`: Binary tensor of shape `(samples, timesteps)` indicating whether a +#' given timestep should be masked. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. +#' This is only relevant if `dropout` or `recurrent_dropout` are set. +#' +#' # Input Shape +#' - If `data_format="channels_first"`: +#' 4D tensor with shape: `(samples, time, channels, rows)` +#' - If `data_format="channels_last"`: +#' 4D tensor with shape: `(samples, time, rows, channels)` +#' +#' # Output Shape +#' - If `return_state`: a list of tensors. The first tensor is the output. +#' The remaining tensors are the last states, +#' each 3D tensor with shape: `(samples, filters, new_rows)` if +#' `data_format='channels_first'` +#' or shape: `(samples, new_rows, filters)` if +#' `data_format='channels_last'`. +#' `rows` values might have changed due to padding. +#' - If `return_sequences`: 4D tensor with shape: `(samples, timesteps, +#' filters, new_rows)` if data_format='channels_first' +#' or shape: `(samples, timesteps, new_rows, filters)` if +#' `data_format='channels_last'`. +#' - Else, 3D tensor with shape: `(samples, filters, new_rows)` if +#' `data_format='channels_first'` +#' or shape: `(samples, new_rows, filters)` if +#' `data_format='channels_last'`. +#' +#' # References +#' - [Shi et al., 2015](https://arxiv.org/abs/1506.04214v1) +#' (the current implementation does not include the feedback loop on the +#' cells output). +#' +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the convolution). +#' +#' @param kernel_size +#' int or tuple/list of 1 integer, specifying the size of +#' the convolution window. +#' +#' @param strides +#' int or tuple/list of 1 integer, specifying the stride length +#' of the convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the +#' same height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or tuple/list of 1 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param activation +#' Activation function to use. By default hyperbolic tangent +#' activation function is applied (`tanh(x)`). +#' +#' @param recurrent_activation +#' Activation function to use for the recurrent step. +#' +#' @param use_bias +#' Boolean, whether the layer uses a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` weights +#' matrix, used for the linear transformation of the recurrent state. +#' +#' @param bias_initializer +#' Initializer for the bias vector. +#' +#' @param unit_forget_bias +#' Boolean. If `TRUE`, add 1 to the bias of +#' the forget gate at initialization. +#' Use in combination with `bias_initializer="zeros"`. +#' This is recommended in [Jozefowicz et al., 2015]( +#' https://proceedings.mlr.press/v37/jozefowicz15.pdf) +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' +#' @param activity_regularizer +#' Regularizer function applied to. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param return_sequences +#' Boolean. Whether to return the last output +#' in the output sequence, or the full sequence. Default: `FALSE`. +#' +#' @param return_state +#' Boolean. Whether to return the last state in addition +#' to the output. Default: `FALSE`. +#' +#' @param go_backwards +#' Boolean (default: `FALSE`). +#' If `TRUE`, process the input sequence backwards and return the +#' reversed sequence. +#' +#' @param stateful +#' Boolean (default `FALSE`). If `TRUE`, the last state +#' for each sample at index i in a batch will be used as initial +#' state for the sample of index i in the following batch. +#' +#' @param unroll +#' Boolean (default: `FALSE`). +#' If `TRUE`, the network will be unrolled, +#' else a symbolic loop will be used. +#' Unrolling can speed-up a RNN, +#' although it tends to be more memory-intensive. +#' Unrolling is only suitable for short sequences. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.ConvLSTM1D +layer_conv_lstm_1d <- +function (object, filters, kernel_size, strides = 1L, padding = "valid", + data_format = NULL, dilation_rate = 1L, activation = "tanh", + recurrent_activation = "sigmoid", use_bias = TRUE, kernel_initializer = "glorot_uniform", + recurrent_initializer = "orthogonal", bias_initializer = "zeros", + unit_forget_bias = TRUE, kernel_regularizer = NULL, recurrent_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, + recurrent_constraint = NULL, bias_constraint = NULL, dropout = 0, + recurrent_dropout = 0, seed = NULL, return_sequences = FALSE, + return_state = FALSE, go_backwards = FALSE, stateful = FALSE, + ..., unroll = NULL) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + seed = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$ConvLSTM1D, object, args) +} + + +#' 2D Convolutional LSTM. +#' +#' @description +#' Similar to an LSTM layer, but the input transformations +#' and recurrent transformations are both convolutional. +#' +#' # Call Arguments +#' - `inputs`: A 5D tensor. +#' - `mask`: Binary tensor of shape `(samples, timesteps)` indicating whether a +#' given timestep should be masked. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. +#' This is only relevant if `dropout` or `recurrent_dropout` are set. +#' - `initial_state`: List of initial state tensors to be passed to the first +#' call of the cell. +#' +#' # Input Shape +#' - If `data_format='channels_first'`: +#' 5D tensor with shape: `(samples, time, channels, rows, cols)` +#' - If `data_format='channels_last'`: +#' 5D tensor with shape: `(samples, time, rows, cols, channels)` +#' +#' # Output Shape +#' - If `return_state`: a list of tensors. The first tensor is the output. +#' The remaining tensors are the last states, +#' each 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if +#' `data_format='channels_first'` +#' or shape: `(samples, new_rows, new_cols, filters)` if +#' `data_format='channels_last'`. `rows` and `cols` values might have +#' changed due to padding. +#' - If `return_sequences`: 5D tensor with shape: `(samples, timesteps, +#' filters, new_rows, new_cols)` if data_format='channels_first' +#' or shape: `(samples, timesteps, new_rows, new_cols, filters)` if +#' `data_format='channels_last'`. +#' - Else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if +#' `data_format='channels_first'` +#' or shape: `(samples, new_rows, new_cols, filters)` if +#' `data_format='channels_last'`. +#' +#' # References +#' - [Shi et al., 2015](https://arxiv.org/abs/1506.04214v1) +#' (the current implementation does not include the feedback loop on the +#' cells output). +#' +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the convolution). +#' +#' @param kernel_size +#' int or tuple/list of 2 integers, specifying the size of the +#' convolution window. +#' +#' @param strides +#' int or tuple/list of 2 integers, specifying the stride length +#' of the convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or tuple/list of 2 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param activation +#' Activation function to use. By default hyperbolic tangent +#' activation function is applied (`tanh(x)`). +#' +#' @param recurrent_activation +#' Activation function to use for the recurrent step. +#' +#' @param use_bias +#' Boolean, whether the layer uses a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` weights +#' matrix, used for the linear transformation of the recurrent state. +#' +#' @param bias_initializer +#' Initializer for the bias vector. +#' +#' @param unit_forget_bias +#' Boolean. If `TRUE`, add 1 to the bias of the forget +#' gate at initialization. +#' Use in combination with `bias_initializer="zeros"`. +#' This is recommended in [Jozefowicz et al., 2015]( +#' https://proceedings.mlr.press/v37/jozefowicz15.pdf) +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' +#' @param activity_regularizer +#' Regularizer function applied to. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param return_sequences +#' Boolean. Whether to return the last output +#' in the output sequence, or the full sequence. Default: `FALSE`. +#' +#' @param return_state +#' Boolean. Whether to return the last state in addition +#' to the output. Default: `FALSE`. +#' +#' @param go_backwards +#' Boolean (default: `FALSE`). +#' If `TRUE`, process the input sequence backwards and return the +#' reversed sequence. +#' +#' @param stateful +#' Boolean (default FALSE). If `TRUE`, the last state +#' for each sample at index i in a batch will be used as initial +#' state for the sample of index i in the following batch. +#' +#' @param unroll +#' Boolean (default: `FALSE`). +#' If `TRUE`, the network will be unrolled, +#' else a symbolic loop will be used. +#' Unrolling can speed-up a RNN, +#' although it tends to be more memory-intensive. +#' Unrolling is only suitable for short sequences. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.ConvLSTM2D +layer_conv_lstm_2d <- +function (object, filters, kernel_size, strides = 1L, padding = "valid", + data_format = NULL, dilation_rate = 1L, activation = "tanh", + recurrent_activation = "sigmoid", use_bias = TRUE, kernel_initializer = "glorot_uniform", + recurrent_initializer = "orthogonal", bias_initializer = "zeros", + unit_forget_bias = TRUE, kernel_regularizer = NULL, recurrent_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, + recurrent_constraint = NULL, bias_constraint = NULL, dropout = 0, + recurrent_dropout = 0, seed = NULL, return_sequences = FALSE, + return_state = FALSE, go_backwards = FALSE, stateful = FALSE, + ..., unroll = NULL) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + seed = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$ConvLSTM2D, object, args) +} + + +#' 3D Convolutional LSTM. +#' +#' @description +#' Similar to an LSTM layer, but the input transformations +#' and recurrent transformations are both convolutional. +#' +#' # Call Arguments +#' - `inputs`: A 6D tensor. +#' - `mask`: Binary tensor of shape `(samples, timesteps)` indicating whether a +#' given timestep should be masked. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. +#' This is only relevant if `dropout` or `recurrent_dropout` are set. +#' - `initial_state`: List of initial state tensors to be passed to the first +#' call of the cell. +#' +#' # Input Shape +#' - If `data_format='channels_first'`: +#' 5D tensor with shape: `(samples, time, channels, *spatial_dims)` +#' - If `data_format='channels_last'`: +#' 5D tensor with shape: `(samples, time, *spatial_dims, channels)` +#' +#' # Output Shape +#' - If `return_state`: a list of tensors. The first tensor is the output. +#' The remaining tensors are the last states, +#' each 4D tensor with shape: `(samples, filters, *spatial_dims)` if +#' `data_format='channels_first'` +#' or shape: `(samples, *spatial_dims, filters)` if +#' `data_format='channels_last'`. +#' - If `return_sequences`: 5D tensor with shape: `(samples, timesteps, +#' filters, *spatial_dims)` if data_format='channels_first' +#' or shape: `(samples, timesteps, *spatial_dims, filters)` if +#' `data_format='channels_last'`. +#' - Else, 4D tensor with shape: `(samples, filters, *spatial_dims)` if +#' `data_format='channels_first'` +#' or shape: `(samples, *spatial_dims, filters)` if +#' `data_format='channels_last'`. +#' +#' # References +#' - [Shi et al., 2015](https://arxiv.org/abs/1506.04214v1) +#' (the current implementation does not include the feedback loop on the +#' cells output). +#' +#' @param filters +#' int, the dimension of the output space (the number of filters +#' in the convolution). +#' +#' @param kernel_size +#' int or tuple/list of 3 integers, specifying the size of the +#' convolution window. +#' +#' @param strides +#' int or tuple/list of 3 integers, specifying the stride length +#' of the convolution. `strides > 1` is incompatible with +#' `dilation_rate > 1`. +#' +#' @param padding +#' string, `"valid"` or `"same"` (case-insensitive). +#' `"valid"` means no padding. `"same"` results in padding evenly to +#' the left/right or up/down of the input such that output has the same +#' height/width dimension as the input. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, steps, features)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, features, steps)`. It defaults to the `image_data_format` +#' value found in your Keras config file at `~/.keras/keras.json`. +#' If you never set it, then it will be `"channels_last"`. +#' +#' @param dilation_rate +#' int or tuple/list of 3 integers, specifying the dilation +#' rate to use for dilated convolution. +#' +#' @param activation +#' Activation function to use. By default hyperbolic tangent +#' activation function is applied (`tanh(x)`). +#' +#' @param recurrent_activation +#' Activation function to use for the recurrent step. +#' +#' @param use_bias +#' Boolean, whether the layer uses a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` weights +#' matrix, used for the linear transformation of the recurrent state. +#' +#' @param bias_initializer +#' Initializer for the bias vector. +#' +#' @param unit_forget_bias +#' Boolean. If `TRUE`, add 1 to the bias of the forget +#' gate at initialization. +#' Use in combination with `bias_initializer="zeros"`. +#' This is recommended in [Jozefowicz et al., 2015]( +#' https://proceedings.mlr.press/v37/jozefowicz15.pdf) +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' +#' @param activity_regularizer +#' Regularizer function applied to. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param return_sequences +#' Boolean. Whether to return the last output +#' in the output sequence, or the full sequence. Default: `FALSE`. +#' +#' @param return_state +#' Boolean. Whether to return the last state in addition +#' to the output. Default: `FALSE`. +#' +#' @param go_backwards +#' Boolean (default: `FALSE`). +#' If `TRUE`, process the input sequence backwards and return the +#' reversed sequence. +#' +#' @param stateful +#' Boolean (default `FALSE`). If `TRUE`, the last state +#' for each sample at index i in a batch will be used as initial +#' state for the sample of index i in the following batch. +#' +#' @param unroll +#' Boolean (default: `FALSE`). +#' If `TRUE`, the network will be unrolled, +#' else a symbolic loop will be used. +#' Unrolling can speed-up a RNN, +#' although it tends to be more memory-intensive. +#' Unrolling is only suitable for short sequences. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.ConvLSTM3D +layer_conv_lstm_3d <- +function (object, filters, kernel_size, strides = 1L, padding = "valid", + data_format = NULL, dilation_rate = 1L, activation = "tanh", + recurrent_activation = "sigmoid", use_bias = TRUE, kernel_initializer = "glorot_uniform", + recurrent_initializer = "orthogonal", bias_initializer = "zeros", + unit_forget_bias = TRUE, kernel_regularizer = NULL, recurrent_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, + recurrent_constraint = NULL, bias_constraint = NULL, dropout = 0, + recurrent_dropout = 0, seed = NULL, return_sequences = FALSE, + return_state = FALSE, go_backwards = FALSE, stateful = FALSE, + ..., unroll = NULL) +{ + args <- capture_args(list(filters = as_integer, kernel_size = as_integer_tuple, + strides = as_integer_tuple, dilation_rate = as_integer_tuple, + seed = as_integer, input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$ConvLSTM3D, object, args) +} + + +#' Gated Recurrent Unit - Cho et al. 2014. +#' +#' @description +#' Based on available runtime hardware and constraints, this layer +#' will choose different implementations (cuDNN-based or backend-native) +#' to maximize the performance. If a GPU is available and all +#' the arguments to the layer meet the requirement of the cuDNN kernel +#' (see below for details), the layer will use a fast cuDNN implementation +#' when using the TensorFlow backend. +#' +#' The requirements to use the cuDNN implementation are: +#' +#' 1. `activation` == `tanh` +#' 2. `recurrent_activation` == `sigmoid` +#' 3. `dropout` == 0 and `recurrent_dropout` == 0 +#' 4. `unroll` is `FALSE` +#' 5. `use_bias` is `TRUE` +#' 6. `reset_after` is `TRUE` +#' 7. Inputs, if use masking, are strictly right-padded. +#' 8. Eager execution is enabled in the outermost context. +#' +#' There are two variants of the GRU implementation. The default one is based +#' on [v3](https://arxiv.org/abs/1406.1078v3) and has reset gate applied to +#' hidden state before matrix multiplication. The other one is based on +#' [original](https://arxiv.org/abs/1406.1078v1) and has the order reversed. +#' +#' The second variant is compatible with CuDNNGRU (GPU-only) and allows +#' inference on CPU. Thus it has separate biases for `kernel` and +#' `recurrent_kernel`. To use this variant, set `reset_after=TRUE` and +#' `recurrent_activation='sigmoid'`. +#' +#' For example: +#' +#' ```{r} +#' inputs <- random_uniform(c(32, 10, 8)) +#' outputs <- inputs |> layer_gru(4) +#' shape(outputs) +#' # (32, 4) +#' gru <- layer_gru(, 4, return_sequences = TRUE, return_state = TRUE) +#' c(whole_sequence_output, final_state) %<-% gru(inputs) +#' shape(whole_sequence_output) +#' shape(final_state) +#' ``` +#' +#' # Call Arguments +#' - `inputs`: A 3D tensor, with shape `(batch, timesteps, feature)`. +#' - `mask`: Binary tensor of shape `(samples, timesteps)` indicating whether +#' a given timestep should be masked (optional). +#' An individual `TRUE` entry indicates that the corresponding timestep +#' should be utilized, while a `FALSE` entry indicates that the +#' corresponding timestep should be ignored. Defaults to `NULL`. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. This argument is passed to the +#' cell when calling it. This is only relevant if `dropout` or +#' `recurrent_dropout` is used (optional). Defaults to `NULL`. +#' - `initial_state`: List of initial state tensors to be passed to the first +#' call of the cell (optional, `NULL` causes creation +#' of zero-filled initial state tensors). Defaults to `NULL`. +#' +#' @param units +#' Positive integer, dimensionality of the output space. +#' +#' @param activation +#' Activation function to use. +#' Default: hyperbolic tangent (`tanh`). +#' If you pass `NULL`, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param recurrent_activation +#' Activation function to use +#' for the recurrent step. +#' Default: sigmoid (`sigmoid`). +#' If you pass `NULL`, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param use_bias +#' Boolean, (default `TRUE`), whether the layer +#' should use a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. Default: +#' `"glorot_uniform"`. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` +#' weights matrix, used for the linear transformation of the recurrent +#' state. Default: `"orthogonal"`. +#' +#' @param bias_initializer +#' Initializer for the bias vector. Default: `"zeros"`. +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param activity_regularizer +#' Regularizer function applied to the output of the +#' layer (its "activation"). Default: `NULL`. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. Default: 0. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. Default: 0. +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param return_sequences +#' Boolean. Whether to return the last output +#' in the output sequence, or the full sequence. Default: `FALSE`. +#' +#' @param return_state +#' Boolean. Whether to return the last state in addition +#' to the output. Default: `FALSE`. +#' +#' @param go_backwards +#' Boolean (default `FALSE`). +#' If `TRUE`, process the input sequence backwards and return the +#' reversed sequence. +#' +#' @param stateful +#' Boolean (default: `FALSE`). If `TRUE`, the last state +#' for each sample at index i in a batch will be used as initial +#' state for the sample of index i in the following batch. +#' +#' @param unroll +#' Boolean (default: `FALSE`). +#' If `TRUE`, the network will be unrolled, +#' else a symbolic loop will be used. +#' Unrolling can speed-up a RNN, +#' although it tends to be more memory-intensive. +#' Unrolling is only suitable for short sequences. +#' +#' @param reset_after +#' GRU convention (whether to apply reset gate after or +#' before matrix multiplication). `FALSE` is `"before"`, +#' `TRUE` is `"after"` (default and cuDNN compatible). +#' +#' @param use_cudnn +#' Whether to use a cuDNN-backed implementation. `"auto"` will +#' attempt to use cuDNN when feasible, and will fallback to the +#' default implementation if not. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family gru rnn layers +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.GRU +layer_gru <- +function (object, units, activation = "tanh", recurrent_activation = "sigmoid", + use_bias = TRUE, kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", + bias_initializer = "zeros", kernel_regularizer = NULL, recurrent_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, + recurrent_constraint = NULL, bias_constraint = NULL, dropout = 0, + recurrent_dropout = 0, seed = NULL, return_sequences = FALSE, + return_state = FALSE, go_backwards = FALSE, stateful = FALSE, + unroll = FALSE, reset_after = TRUE, use_cudnn = "auto", ...) +{ + args <- capture_args(list(units = as_integer, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$GRU, object, args) +} + + +#' Cell class for the GRU layer. +#' +#' @description +#' This class processes one step within the whole time sequence input, whereas +#' [`layer_gru()`] processes the whole sequence. +#' +#' # Call Arguments +#' - `inputs`: A 2D tensor, with shape `(batch, features)`. +#' - `states`: A 2D tensor with shape `(batch, units)`, which is the state +#' from the previous time step. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. Only relevant when `dropout` or +#' `recurrent_dropout` is used. +#' +#' # Examples +#' ```{r} +#' inputs <- random_uniform(c(32, 10, 8)) +#' outputs <- inputs |> layer_rnn(rnn_cell_gru(4)) +#' shape(outputs) +#' rnn <- layer_rnn( +#' cell = rnn_cell_gru(4), +#' return_sequences=TRUE, +#' return_state=TRUE) +#' c(whole_sequence_output, final_state) %<-% rnn(inputs) +#' shape(whole_sequence_output) +#' shape(final_state) +#' ``` +#' +#' @param units +#' Positive integer, dimensionality of the output space. +#' +#' @param activation +#' Activation function to use. Default: hyperbolic tangent +#' (`tanh`). If you pass `NULL`, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param recurrent_activation +#' Activation function to use for the recurrent step. +#' Default: sigmoid (`sigmoid`). If you pass `NULL`, no activation is +#' applied (ie. "linear" activation: `a(x) = x`). +#' +#' @param use_bias +#' Boolean, (default `TRUE`), whether the layer +#' should use a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. Default: +#' `"glorot_uniform"`. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` +#' weights matrix, used for the linear transformation +#' of the recurrent state. Default: `"orthogonal"`. +#' +#' @param bias_initializer +#' Initializer for the bias vector. Default: `"zeros"`. +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. Default: 0. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. Default: 0. +#' +#' @param reset_after +#' GRU convention (whether to apply reset gate after or +#' before matrix multiplication). `FALSE` = `"before"`, +#' `TRUE` = `"after"` (default and cuDNN compatible). +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @returns A `Layer` instance, which is intended to be used with `layer_rnn()`. +#' @export +#' @family rnn cells +#' @family gru rnn layers +#' @family rnn layers +#' @family layers +# @seealso +# + +#' @tether keras.layers.GRUCell +rnn_cell_gru <- +function (units, activation = "tanh", recurrent_activation = "sigmoid", + use_bias = TRUE, kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", + bias_initializer = "zeros", kernel_regularizer = NULL, recurrent_regularizer = NULL, + bias_regularizer = NULL, kernel_constraint = NULL, recurrent_constraint = NULL, + bias_constraint = NULL, dropout = 0, recurrent_dropout = 0, + reset_after = TRUE, seed = NULL, ...) +{ + args <- capture_args(list(units = as_integer, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape)) + create_layer(keras$layers$GRUCell, NULL, args) +} + + +#' Long Short-Term Memory layer - Hochreiter 1997. +#' +#' @description +#' Based on available runtime hardware and constraints, this layer +#' will choose different implementations (cuDNN-based or backend-native) +#' to maximize the performance. If a GPU is available and all +#' the arguments to the layer meet the requirement of the cuDNN kernel +#' (see below for details), the layer will use a fast cuDNN implementation +#' when using the TensorFlow backend. +#' The requirements to use the cuDNN implementation are: +#' +#' 1. `activation` == `tanh` +#' 2. `recurrent_activation` == `sigmoid` +#' 3. `dropout` == 0 and `recurrent_dropout` == 0 +#' 4. `unroll` is `FALSE` +#' 5. `use_bias` is `TRUE` +#' 6. Inputs, if use masking, are strictly right-padded. +#' 7. Eager execution is enabled in the outermost context. +#' +#' For example: +#' +#' ```{r} +#' input <- random_uniform(c(32, 10, 8)) +#' output <- input |> layer_lstm(4) +#' shape(output) +#' +#' lstm <- layer_lstm(units = 4, return_sequences = TRUE, return_state = TRUE) +#' c(whole_seq_output, final_memory_state, final_carry_state) %<-% lstm(input) +#' shape(whole_seq_output) +#' shape(final_memory_state) +#' shape(final_carry_state) +#' ``` +#' +#' # Call Arguments +#' - `inputs`: A 3D tensor, with shape `(batch, timesteps, feature)`. +#' - `mask`: Binary tensor of shape `(samples, timesteps)` indicating whether +#' a given timestep should be masked (optional). +#' An individual `TRUE` entry indicates that the corresponding timestep +#' should be utilized, while a `FALSE` entry indicates that the +#' corresponding timestep should be ignored. Defaults to `NULL`. +#' - `training`: Boolean indicating whether the layer should behave in +#' training mode or in inference mode. This argument is passed to the +#' cell when calling it. This is only relevant if `dropout` or +#' `recurrent_dropout` is used (optional). Defaults to `NULL`. +#' - `initial_state`: List of initial state tensors to be passed to the first +#' call of the cell (optional, `NULL` causes creation +#' of zero-filled initial state tensors). Defaults to `NULL`. +#' +#' @param units +#' Positive integer, dimensionality of the output space. +#' +#' @param activation +#' Activation function to use. +#' Default: hyperbolic tangent (`tanh`). +#' If you pass `NULL`, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param recurrent_activation +#' Activation function to use +#' for the recurrent step. +#' Default: sigmoid (`sigmoid`). +#' If you pass `NULL`, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param use_bias +#' Boolean, (default `TRUE`), whether the layer +#' should use a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. Default: +#' `"glorot_uniform"`. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` +#' weights matrix, used for the linear transformation of the recurrent +#' state. Default: `"orthogonal"`. +#' +#' @param bias_initializer +#' Initializer for the bias vector. Default: `"zeros"`. +#' +#' @param unit_forget_bias +#' Boolean (default `TRUE`). If `TRUE`, +#' add 1 to the bias of the forget gate at initialization. +#' Setting it to `TRUE` will also force `bias_initializer="zeros"`. +#' This is recommended in [Jozefowicz et al.]( +#' https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf) +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param activity_regularizer +#' Regularizer function applied to the output of the +#' layer (its "activation"). Default: `NULL`. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. Default: 0. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. Default: 0. +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param return_sequences +#' Boolean. Whether to return the last output +#' in the output sequence, or the full sequence. Default: `FALSE`. +#' +#' @param return_state +#' Boolean. Whether to return the last state in addition +#' to the output. Default: `FALSE`. +#' +#' @param go_backwards +#' Boolean (default: `FALSE`). +#' If `TRUE`, process the input sequence backwards and return the +#' reversed sequence. +#' +#' @param stateful +#' Boolean (default: `FALSE`). If `TRUE`, the last state +#' for each sample at index i in a batch will be used as initial +#' state for the sample of index i in the following batch. +#' +#' @param unroll +#' Boolean (default `FALSE`). +#' If `TRUE`, the network will be unrolled, +#' else a symbolic loop will be used. +#' Unrolling can speed-up a RNN, +#' although it tends to be more memory-intensive. +#' Unrolling is only suitable for short sequences. +#' +#' @param use_cudnn +#' Whether to use a cuDNN-backed implementation. `"auto"` will +#' attempt to use cuDNN when feasible, and will fallback to the +#' default implementation if not. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family lstm rnn layers +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' @tether keras.layers.LSTM +layer_lstm <- +function (object, units, activation = "tanh", recurrent_activation = "sigmoid", + use_bias = TRUE, kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", + bias_initializer = "zeros", unit_forget_bias = TRUE, kernel_regularizer = NULL, + recurrent_regularizer = NULL, bias_regularizer = NULL, activity_regularizer = NULL, + kernel_constraint = NULL, recurrent_constraint = NULL, bias_constraint = NULL, + dropout = 0, recurrent_dropout = 0, seed = NULL, return_sequences = FALSE, + return_state = FALSE, go_backwards = FALSE, stateful = FALSE, + unroll = FALSE, use_cudnn = "auto", ...) +{ + args <- capture_args(list(units = as_integer, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$LSTM, object, args) +} + + +#' Cell class for the LSTM layer. +#' +#' @description +#' This class processes one step within the whole time sequence input, whereas +#' [`layer_lstm()`] processes the whole sequence. +#' +#' # Call Arguments +#' - `inputs`: A 2D tensor, with shape `(batch, features)`. +#' - `states`: A 2D tensor with shape `(batch, units)`, which is the state +#' from the previous time step. +#' - `training`: Boolean indicating whether the layer should behave in +#' training mode or in inference mode. Only relevant when `dropout` or +#' `recurrent_dropout` is used. +#' +#' # Examples +#' ```{r} +#' inputs <- random_uniform(c(32, 10, 8)) +#' output <- inputs |> +#' layer_rnn(cell = rnn_cell_lstm(4)) +#' shape(output) +#' +#' rnn <- layer_rnn(cell = rnn_cell_lstm(4), +#' return_sequences = T, +#' return_state = T) +#' c(whole_sequence_output, ...final_state) %<-% rnn(inputs) +#' str(whole_sequence_output) +#' str(final_state) +#' ``` +#' +#' @param units +#' Positive integer, dimensionality of the output space. +#' +#' @param activation +#' Activation function to use. Default: hyperbolic tangent +#' (`tanh`). If you pass `NULL`, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param recurrent_activation +#' Activation function to use for the recurrent step. +#' Default: sigmoid (`sigmoid`). If you pass `NULL`, no activation is +#' applied (ie. "linear" activation: `a(x) = x`). +#' +#' @param use_bias +#' Boolean, (default `TRUE`), whether the layer +#' should use a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. Default: +#' `"glorot_uniform"`. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` +#' weights matrix, used for the linear transformation +#' of the recurrent state. Default: `"orthogonal"`. +#' +#' @param bias_initializer +#' Initializer for the bias vector. Default: `"zeros"`. +#' +#' @param unit_forget_bias +#' Boolean (default `TRUE`). If `TRUE`, +#' add 1 to the bias of the forget gate at initialization. +#' Setting it to `TRUE` will also force `bias_initializer="zeros"`. +#' This is recommended in [Jozefowicz et al.]( +#' https://github.com/mlresearch/v37/blob/gh-pages/jozefowicz15.pdf) +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. Default: 0. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. Default: 0. +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit rnn_cell_gru return +#' @export +#' @family rnn cells +#' @family lstm rnn layers +#' @family rnn layers +#' @family layers +# @seealso +# + +#' @tether keras.layers.LSTMCell +rnn_cell_lstm <- +function (units, activation = "tanh", recurrent_activation = "sigmoid", + use_bias = TRUE, kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", + bias_initializer = "zeros", unit_forget_bias = TRUE, kernel_regularizer = NULL, + recurrent_regularizer = NULL, bias_regularizer = NULL, kernel_constraint = NULL, + recurrent_constraint = NULL, bias_constraint = NULL, dropout = 0, + recurrent_dropout = 0, seed = NULL, ...) +{ + args <- capture_args(list(units = as_integer, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape)) + create_layer(keras$layers$LSTMCell, NULL, args) +} + + +#' Base class for recurrent layers +#' +#' @description +#' +#' # Call Arguments +#' - `inputs`: Input tensor. +#' - `initial_state`: List of initial state tensors to be passed to the first +#' call of the cell. +#' - `mask`: Binary tensor of shape `[batch_size, timesteps]` +#' indicating whether a given timestep should be masked. +#' An individual `TRUE` entry indicates that the corresponding +#' timestep should be utilized, while a `FALSE` entry indicates +#' that the corresponding timestep should be ignored. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. This argument is passed +#' to the cell when calling it. +#' This is for use with cells that use dropout. +#' +#' # Input Shape +#' 3-D tensor with shape `(batch_size, timesteps, features)`. +#' +#' # Output Shape +#' - If `return_state`: a list of tensors. The first tensor is +#' the output. The remaining tensors are the last states, +#' each with shape `(batch_size, state_size)`, where `state_size` could +#' be a high dimension tensor shape. +#' - If `return_sequences`: 3D tensor with shape +#' `(batch_size, timesteps, output_size)`. +#' +#' # Masking: +#' +#' This layer supports masking for input data with a variable number +#' of timesteps. To introduce masks to your data, +#' use a [`layer_embedding()`] layer with the `mask_zero` parameter +#' set to `TRUE`. +#' +#' Note on using statefulness in RNNs: +#' +#' You can set RNN layers to be 'stateful', which means that the states +#' computed for the samples in one batch will be reused as initial states +#' for the samples in the next batch. This assumes a one-to-one mapping +#' between samples in different successive batches. +#' +#' To enable statefulness: +#' +#' - Specify `stateful = TRUE` in the layer constructor. +#' - Specify a fixed batch size for your model, by passing +#' - If sequential model: +#' `input_batch_shape = c(...)` to the `keras_model_sequential()` call. +#' - Else for functional model with 1 or more input layers: +#' `batch_shape = c(...)` to the `layer_input()` call(s). +#' +#' This is the expected shape of your inputs +#' *including the batch size*. +#' It should be a list of integers, e.g. `c(32, 10, 100)`. +#' - Specify `shuffle = FALSE` when calling `fit()`. +#' +#' To reset the states of your model, call [`reset_state()`] on either +#' a specific layer, or on your entire model. +#' +#' Note on specifying the initial state of RNNs: +#' +#' You can specify the initial state of RNN layers symbolically by +#' calling them with the keyword argument `initial_state`. The value of +#' `initial_state` should be a tensor or list of tensors representing +#' the initial state of the RNN layer. +#' +#' # Examples +#' +#' First, let's define a RNN Cell, as a layer subclass. +#' ```{r} +#' rnn_cell_minimal <- Layer( +#' "MinimalRNNCell", +#' +#' initialize = function(units, ...) { +#' super$initialize(...) +#' self$units <- as.integer(units) +#' self$state_size <- as.integer(units) +#' }, +#' +#' build = function(input_shape) { +#' self$kernel <- self$add_weight( +#' shape = shape(tail(input_shape, 1), self$units), +#' initializer = 'uniform', +#' name = 'kernel' +#' ) +#' self$recurrent_kernel <- self$add_weight( +#' shape = shape(self$units, self$units), +#' initializer = 'uniform', +#' name = 'recurrent_kernel' +#' ) +#' self$built <- TRUE +#' }, +#' +#' call = function(inputs, states) { +#' prev_output <- states[[1]] +#' h <- op_matmul(inputs, self$kernel) +#' output <- h + op_matmul(prev_output, self$recurrent_kernel) +#' list(output, list(output)) +#' } +#' ) +#' ``` +#' +#' Let's use this cell in a RNN layer: +#' ```{r} +#' cell <- rnn_cell_minimal(units = 32) +#' x <- layer_input(shape = shape(NULL, 5)) +#' layer <- layer_rnn(cell = cell) +#' y <- layer(x) +#' ``` +#' +# Here's how to use the cell to build a stacked RNN: +#' ```{r} +#' cells <- list(rnn_cell_minimal(units = 32), rnn_cell_minimal(units = 4)) +#' x <- layer_input(shape = shape(NULL, 5)) +#' layer <- layer_rnn(cell = cells) +#' y <- layer(x) +#' ``` +#' +#' @param cell +#' A RNN cell instance or a list of RNN cell instances. +#' A RNN cell is a class that has: +#' - A `call(input_at_t, states_at_t)` method, returning +#' `(output_at_t, states_at_t_plus_1)`. The call method of the +#' cell can also take the optional argument `constants`, see +#' section "Note on passing external constants" below. +#' - A `state_size` attribute. This can be a single integer +#' (single state) in which case it is the size of the recurrent +#' state. This can also be a list of integers +#' (one size per state). +#' - A `output_size` attribute, a single integer. +#' - A `get_initial_state(batch_size=NULL)` +#' method that creates a tensor meant to be fed to `call()` as the +#' initial state, if the user didn't specify any initial state +#' via other means. The returned initial state should have +#' shape `(batch_size, cell.state_size)`. +#' The cell might choose to create a tensor full of zeros, +#' or other values based on the cell's implementation. +#' `inputs` is the input tensor to the RNN layer, with shape +#' `(batch_size, timesteps, features)`. +#' If this method is not implemented +#' by the cell, the RNN layer will create a zero filled tensor +#' with shape `(batch_size, cell$state_size)`. +#' In the case that `cell` is a list of RNN cell instances, the cells +#' will be stacked on top of each other in the RNN, resulting in an +#' efficient stacked RNN. +#' +#' @param return_sequences +#' Boolean (default `FALSE`). Whether to return the last +#' output in the output sequence, or the full sequence. +#' +#' @param return_state +#' Boolean (default `FALSE`). +#' Whether to return the last state in addition to the output. +#' +#' @param go_backwards +#' Boolean (default `FALSE`). +#' If `TRUE`, process the input sequence backwards and return the +#' reversed sequence. +#' +#' @param stateful +#' Boolean (default `FALSE`). If TRUE, the last state +#' for each sample at index `i` in a batch will be used as initial +#' state for the sample of index `i` in the following batch. +#' +#' @param unroll +#' Boolean (default `FALSE`). +#' If TRUE, the network will be unrolled, else a symbolic loop will be +#' used. Unrolling can speed-up a RNN, although it tends to be more +#' memory-intensive. Unrolling is only suitable for short sequences. +#' +#' @param zero_output_for_mask +#' Boolean (default `FALSE`). +#' Whether the output should use zeros for the masked timesteps. +#' Note that this field is only used when `return_sequences` +#' is `TRUE` and `mask` is provided. +#' It can useful if you want to reuse the raw output sequence of +#' the RNN without interference from the masked timesteps, e.g., +#' merging bidirectional RNNs. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family rnn cells +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.RNN +layer_rnn <- +function (object, cell, return_sequences = FALSE, return_state = FALSE, + go_backwards = FALSE, stateful = FALSE, unroll = FALSE, zero_output_for_mask = FALSE, + ...) +{ + args <- capture_args(list(cell = as_integer, input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$RNN, object, args) +} + + +#' Fully-connected RNN where the output is to be fed back as the new input. +#' +#' @description +#' +#' # Call Arguments +#' - `sequence`: A 3D tensor, with shape `[batch, timesteps, feature]`. +#' - `mask`: Binary tensor of shape `[batch, timesteps]` indicating whether +#' a given timestep should be masked. An individual `TRUE` entry +#' indicates that the corresponding timestep should be utilized, +#' while a `FALSE` entry indicates that the corresponding timestep +#' should be ignored. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. +#' This argument is passed to the cell when calling it. +#' This is only relevant if `dropout` or `recurrent_dropout` is used. +#' - `initial_state`: List of initial state tensors to be passed to the first +#' call of the cell. +#' +#' # Examples +#' ```{r} +#' inputs <- random_uniform(c(32, 10, 8)) +#' simple_rnn <- layer_simple_rnn(units = 4) +#' output <- simple_rnn(inputs) # The output has shape `(32, 4)`. +#' simple_rnn <- layer_simple_rnn( +#' units = 4, return_sequences=TRUE, return_state=TRUE +#' ) +#' # whole_sequence_output has shape `(32, 10, 4)`. +#' # final_state has shape `(32, 4)`. +#' c(whole_sequence_output, final_state) %<-% simple_rnn(inputs) +#' ``` +#' +#' @param units +#' Positive integer, dimensionality of the output space. +#' +#' @param activation +#' Activation function to use. +#' Default: hyperbolic tangent (`tanh`). +#' If you pass NULL, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param use_bias +#' Boolean, (default `TRUE`), whether the layer uses +#' a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. Default: +#' `"glorot_uniform"`. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` +#' weights matrix, used for the linear transformation of the recurrent +#' state. Default: `"orthogonal"`. +#' +#' @param bias_initializer +#' Initializer for the bias vector. Default: `"zeros"`. +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param activity_regularizer +#' Regularizer function applied to the output of the +#' layer (its "activation"). Default: `NULL`. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param dropout +#' Float between 0 and 1. +#' Fraction of the units to drop for the linear transformation +#' of the inputs. Default: 0. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. +#' Fraction of the units to drop for the linear transformation of the +#' recurrent state. Default: 0. +#' +#' @param return_sequences +#' Boolean. Whether to return the last output +#' in the output sequence, or the full sequence. Default: `FALSE`. +#' +#' @param return_state +#' Boolean. Whether to return the last state +#' in addition to the output. Default: `FALSE`. +#' +#' @param go_backwards +#' Boolean (default: `FALSE`). +#' If `TRUE`, process the input sequence backwards and return the +#' reversed sequence. +#' +#' @param stateful +#' Boolean (default: `FALSE`). If `TRUE`, the last state +#' for each sample at index i in a batch will be used as initial +#' state for the sample of index i in the following batch. +#' +#' @param unroll +#' Boolean (default: `FALSE`). +#' If `TRUE`, the network will be unrolled, +#' else a symbolic loop will be used. +#' Unrolling can speed-up a RNN, +#' although it tends to be more memory-intensive. +#' Unrolling is only suitable for short sequences. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param seed +#' Initial seed for the random number generator +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family simple rnn layers +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.SimpleRNN +layer_simple_rnn <- +function (object, units, activation = "tanh", use_bias = TRUE, + kernel_initializer = "glorot_uniform", recurrent_initializer = "orthogonal", + bias_initializer = "zeros", kernel_regularizer = NULL, recurrent_regularizer = NULL, + bias_regularizer = NULL, activity_regularizer = NULL, kernel_constraint = NULL, + recurrent_constraint = NULL, bias_constraint = NULL, dropout = 0, + recurrent_dropout = 0, return_sequences = FALSE, return_state = FALSE, + go_backwards = FALSE, stateful = FALSE, unroll = FALSE, seed = NULL, + ...) +{ + args <- capture_args(list(units = as_integer, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape), ignore = "object") + create_layer(keras$layers$SimpleRNN, object, args) +} + + +#' Cell class for SimpleRNN. +#' +#' @description +#' This class processes one step within the whole time sequence input, whereas +#' [`layer_simple_rnn()`] processes the whole sequence. +#' +#' # Call Arguments +#' - `sequence`: A 2D tensor, with shape `(batch, features)`. +#' - `states`: A 2D tensor with shape `(batch, units)`, which is the state +#' from the previous time step. +#' - `training`: Python boolean indicating whether the layer should behave in +#' training mode or in inference mode. Only relevant when `dropout` or +#' `recurrent_dropout` is used. +#' +#' # Examples +#' ```{r} +#' inputs <- random_uniform(c(32, 10, 8)) +#' rnn <- layer_rnn(cell = rnn_cell_simple(units = 4)) +#' output <- rnn(inputs) # The output has shape `(32, 4)`. +#' rnn <- layer_rnn( +#' cell = rnn_cell_simple(units = 4), +#' return_sequences=TRUE, +#' return_state=TRUE +#' ) +#' # whole_sequence_output has shape `(32, 10, 4)`. +#' # final_state has shape `(32, 4)`. +#' c(whole_sequence_output, final_state) %<-% rnn(inputs) +#' ``` +#' +#' @param units +#' Positive integer, dimensionality of the output space. +#' +#' @param activation +#' Activation function to use. +#' Default: hyperbolic tangent (`tanh`). +#' If you pass `NULL`, no activation is applied +#' (ie. "linear" activation: `a(x) = x`). +#' +#' @param use_bias +#' Boolean, (default `TRUE`), whether the layer +#' should use a bias vector. +#' +#' @param kernel_initializer +#' Initializer for the `kernel` weights matrix, +#' used for the linear transformation of the inputs. Default: +#' `"glorot_uniform"`. +#' +#' @param recurrent_initializer +#' Initializer for the `recurrent_kernel` +#' weights matrix, used for the linear transformation +#' of the recurrent state. Default: `"orthogonal"`. +#' +#' @param bias_initializer +#' Initializer for the bias vector. Default: `"zeros"`. +#' +#' @param kernel_regularizer +#' Regularizer function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_regularizer +#' Regularizer function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_regularizer +#' Regularizer function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param kernel_constraint +#' Constraint function applied to the `kernel` weights +#' matrix. Default: `NULL`. +#' +#' @param recurrent_constraint +#' Constraint function applied to the +#' `recurrent_kernel` weights matrix. Default: `NULL`. +#' +#' @param bias_constraint +#' Constraint function applied to the bias vector. +#' Default: `NULL`. +#' +#' @param dropout +#' Float between 0 and 1. Fraction of the units to drop for the +#' linear transformation of the inputs. Default: 0. +#' +#' @param recurrent_dropout +#' Float between 0 and 1. Fraction of the units to drop +#' for the linear transformation of the recurrent state. Default: 0. +#' +#' @param seed +#' Random seed for dropout. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit rnn_cell_gru return +#' @export +#' @family rnn cells +#' @family simple rnn layers +#' @family rnn layers +#' @family layers +# @seealso +# + +#' +#' @tether keras.layers.SimpleRNNCell +rnn_cell_simple <- +function (units, activation = "tanh", use_bias = TRUE, kernel_initializer = "glorot_uniform", + recurrent_initializer = "orthogonal", bias_initializer = "zeros", + kernel_regularizer = NULL, recurrent_regularizer = NULL, + bias_regularizer = NULL, kernel_constraint = NULL, recurrent_constraint = NULL, + bias_constraint = NULL, dropout = 0, recurrent_dropout = 0, + seed = NULL, ...) +{ + args <- capture_args(list(units = as_integer, seed = as_integer, + input_shape = normalize_shape, batch_size = as_integer, + batch_input_shape = normalize_shape)) + create_layer(keras$layers$SimpleRNNCell, NULL, args) +} + + +#' Wrapper allowing a stack of RNN cells to behave as a single cell. +#' +#' @description +#' Used to implement efficient stacked RNNs. +#' +#' # Example +#' ```{r} +#' batch_size <- 3 +#' sentence_length <- 5 +#' num_features <- 2 +#' new_shape <- c(batch_size, sentence_length, num_features) +#' x <- array(1:30, dim = new_shape) +#' +#' rnn_cells <- lapply(1:2, function(x) rnn_cell_lstm(units = 128)) +#' stacked_lstm <- rnn_cells_stack(rnn_cells) +#' lstm_layer <- layer_rnn(cell = stacked_lstm) +#' +#' result <- lstm_layer(x) +#' str(result) +#' ``` +#' +#' @param cells +#' List of RNN cell instances. +#' +#' @param ... +#' Unnamed arguments are treated as additional `cells`. +#' Named arguments are passed on to the underlying layer. +#' +#' @inherit rnn_cell_gru return +#' @export +#' @family rnn layers +#' @family layers +# @seealso +# + +#' +#' @tether keras.layers.StackedRNNCells +rnn_cells_stack <- +function (cells, ...) +{ + args <- capture_args( + list( + input_shape = normalize_shape, + batch_size = as_integer, + batch_input_shape = normalize_shape + ), + ignore = c("...", "cells") + ) + dots <- split_dots_named_unnamed(list2(...)) + if (missing(cells)) + cells <- NULL + args$cells <- c(cells, dots$unnamed) + args <- c(args, dots$named) + create_layer(keras$layers$StackedRNNCells, NULL, args) +} + + +#' This wrapper allows to apply a layer to every temporal slice of an input. +#' +#' @description +#' Every input should be at least 3D, and the dimension of index one of the +#' first input will be considered to be the temporal dimension. +#' +#' Consider a batch of 32 video samples, where each sample is a 128x128 RGB +#' image with `channels_last` data format, across 10 timesteps. +#' The batch input shape is `(32, 10, 128, 128, 3)`. +#' +#' You can then use `TimeDistributed` to apply the same `Conv2D` layer to each +#' of the 10 timesteps, independently: +#' +#' ```{r} +#' inputs <- keras_input(shape = c(10, 128, 128, 3), batch_size = 32) +#' conv_2d_layer <- layer_conv_2d(filters = 64, kernel_size = c(3, 3)) +#' outputs <- layer_time_distributed(inputs, layer = conv_2d_layer) +#' shape(outputs) +#' ``` +#' +#' Because `layer_time_distributed` applies the same instance of `layer_conv2d` to each of +#' the timestamps, the same set of weights are used at each timestamp. +#' +#' # Call Arguments +#' - `inputs`: Input tensor of shape (batch, time, ...) or nested tensors, +#' and each of which has shape (batch, time, ...). +#' - `training`: Boolean indicating whether the layer should behave in +#' training mode or in inference mode. This argument is passed to the +#' wrapped layer (only if the layer supports this argument). +#' - `mask`: Binary tensor of shape `(samples, timesteps)` indicating whether +#' a given timestep should be masked. This argument is passed to the +#' wrapped layer (only if the layer supports this argument). +#' +#' @param layer +#' A `Layer` instance. +#' +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit layer_dense return +#' @export +#' @family rnn layers +#' @family layers +#' @seealso +#' + +# + +#' +#' @tether keras.layers.TimeDistributed +layer_time_distributed <- +function (object, layer, ...) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$TimeDistributed, object, args) +} diff --git a/R/learning_rate_schedules.R b/R/learning_rate_schedules.R deleted file mode 100644 index ee14347fa6..0000000000 --- a/R/learning_rate_schedules.R +++ /dev/null @@ -1,437 +0,0 @@ - - -#' A LearningRateSchedule that uses an exponential decay schedule -#' -#' @details -#' When training a model, it is often useful to lower the learning rate as -#' the training progresses. This schedule applies an exponential decay function -#' to an optimizer step, given a provided initial learning rate. -#' -#' The schedule is a 1-arg callable that produces a decayed learning -#' rate when passed the current optimizer step. This can be useful for changing -#' the learning rate value across different invocations of optimizer functions. -#' It is computed as: -#' -#' ````r -#' decayed_learning_rate <- function(step) -#' initial_learning_rate * decay_rate ^ (step / decay_steps) -#' ```` -#' -#' If the argument `staircase` is `TRUE`, then `step / decay_steps` is -#' an integer division (`%/%`) and the decayed learning rate follows a -#' staircase function. -#' -#' You can pass this schedule directly into a optimizer -#' as the learning rate (see example) -#' Example: When fitting a Keras model, decay every 100000 steps with a base -#' of 0.96: -#' -#' ```R -#' initial_learning_rate <- 0.1 -#' lr_schedule <- learning_rate_schedule_exponential_decay( -#' initial_learning_rate, -#' decay_steps = 100000, -#' decay_rate = 0.96, -#' staircase = TRUE) -#' -#' model %>% compile( -#' optimizer= optimizer_sgd(learning_rate = lr_schedule), -#' loss = 'sparse_categorical_crossentropy', -#' metrics = 'accuracy') -#' -#' model %>% fit(data, labels, epochs = 5) -#' ``` -#' -#' @param ... For backwards and forwards compatibility -#' -#' @seealso -#' + -#' -#' @param initial_learning_rate A scalar `float32` or `float64` `Tensor` or a R -#' number. The initial learning rate. -#' @param decay_steps A scalar `int32` or `int64` `Tensor` or an R number. Must -#' be positive. See the decay computation above. -#' @param decay_rate A scalar `float32` or `float64` `Tensor` or an R number. -#' The decay rate. -#' @param staircase Boolean. If `TRUE` decay the learning rate at discrete -#' intervals. -#' @param name String. Optional name of the operation. Defaults to -#' 'ExponentialDecay'. -#' @export -learning_rate_schedule_exponential_decay <- -function(initial_learning_rate, decay_steps, decay_rate, staircase = FALSE, - ..., name = NULL) -{ - args <- capture_args(match.call(), list( - decay_steps = as_integer)) - do.call(keras$optimizers$schedules$ExponentialDecay, args) -} - -#' A LearningRateSchedule that uses a cosine decay schedule -#' -#' @details -#' See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), -#' SGDR: Stochastic Gradient Descent with Warm Restarts. -#' -#' When training a model, it is often useful to lower the learning rate as -#' the training progresses. This schedule applies a cosine decay function -#' to an optimizer step, given a provided initial learning rate. -#' It requires a `step` value to compute the decayed learning rate. You can -#' just pass a TensorFlow variable that you increment at each training step. -#' -#' The schedule is a 1-arg callable that produces a decayed learning -#' rate when passed the current optimizer step. This can be useful for changing -#' the learning rate value across different invocations of optimizer functions. -#' It is computed as: -#' -#' ```r -#' decayed_learning_rate <- function(step) { -#' step <- min(step, decay_steps) -#' cosine_decay = <- 0.5 * (1 + cos(pi * step / decay_steps)) -#' decayed <- (1 - alpha) * cosine_decay + alpha -#' initial_learning_rate * decayed -#' } -#' ``` -#' -#' Example usage: -#' ```R -#' decay_steps <- 1000 -#' lr_decayed_fn <- -#' learning_rate_schedule_cosine_decay(initial_learning_rate, decay_steps) -#' ``` -#' -#' You can pass this schedule directly into a keras Optimizer -#' as the `learning_rate`. -#' -#' @param initial_learning_rate A scalar `float32` or `float64` Tensor or a -#' R number. The initial learning rate. -#' @param decay_steps A scalar `int32` or `int64` `Tensor` or an R number. -#' Number of steps to decay over. -#' @param alpha A scalar `float32` or `float64` Tensor or an R number. -#' Minimum learning rate value as a fraction of initial_learning_rate. -#' @param name String. Optional name of the operation. Defaults to -#' 'CosineDecay'. -#' -#' @param ... For backwards and forwards compatibility -#' -#' @seealso -#' + -#' @export -learning_rate_schedule_cosine_decay <- - function(initial_learning_rate, decay_steps, alpha = 0, ..., name = NULL) - { - args <- capture_args(match.call(), list( - decay_steps = as_integer)) - do.call(keras$optimizers$schedules$CosineDecay, args) - } - - -#' A LearningRateSchedule that uses a cosine decay schedule with restarts -#' -#' @details -#' See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), -#' SGDR: Stochastic Gradient Descent with Warm Restarts. -#' -#' When training a model, it is often useful to lower the learning rate as -#' the training progresses. This schedule applies a cosine decay function with -#' restarts to an optimizer step, given a provided initial learning rate. -#' It requires a `step` value to compute the decayed learning rate. You can -#' just pass a TensorFlow variable that you increment at each training step. -#' -#' The schedule is a 1-arg callable that produces a decayed learning -#' rate when passed the current optimizer step. This can be useful for changing -#' the learning rate value across different invocations of optimizer functions. -#' -#' The learning rate multiplier first decays -#' from 1 to `alpha` for `first_decay_steps` steps. Then, a warm -#' restart is performed. Each new warm restart runs for `t_mul` times more -#' steps and with `m_mul` times initial learning rate as the new learning rate. -#' -#' -#' You can pass this schedule directly into a keras Optimizer -#' as the `learning_rate`. -#' -#' @param initial_learning_rate A scalar `float32` or `float64` Tensor or an R -#' number. The initial learning rate. -#' @param first_decay_steps A scalar `int32` or `int64` `Tensor` or an R -#' number. Number of steps to decay over. -#' @param t_mul A scalar `float32` or `float64` `Tensor` or an R number. Used -#' to derive the number of iterations in the i-th period. -#' @param m_mul A scalar `float32` or `float64` `Tensor` or an R number. Used -#' to derive the initial learning rate of the i-th period. -#' @param alpha A scalar `float32` or `float64` Tensor or an R number. Minimum -#' learning rate value as a fraction of the initial_learning_rate. -#' @param name String. Optional name of the operation. Defaults to -#' 'SGDRDecay'. -#' -#' @param ... For backwards and forwards compatibility -#' -#' @seealso -#' + -#' @export -learning_rate_schedule_cosine_decay_restarts <- -function(initial_learning_rate, first_decay_steps, t_mul = 2, - m_mul = 1, alpha = 0, ..., name = NULL) -{ - args <- capture_args(match.call(), list(first_decay_steps = as_integer)) - do.call(keras$optimizers$schedules$CosineDecayRestarts, args) -} - - -#' A LearningRateSchedule that uses an inverse time decay schedule -#' -#' @details -#' When training a model, it is often useful to lower the learning rate as -#' the training progresses. This schedule applies the inverse decay function -#' to an optimizer step, given a provided initial learning rate. -#' It requires a `step` value to compute the decayed learning rate. You can -#' just pass a TensorFlow variable that you increment at each training step. -#' -#' The schedule is a 1-arg callable that produces a decayed learning -#' rate when passed the current optimizer step. This can be useful for changing -#' the learning rate value across different invocations of optimizer functions. -#' It is computed as: -#' -#' ```R -#' decayed_learning_rate <- function(step) { -#' initial_learning_rate / (1 + decay_rate * step / decay_step) -#' } -#' ``` -#' -#' or, if `staircase` is `TRUE`, as: -#' -#' ```R -#' decayed_learning_rate function(step) { -#' initial_learning_rate / (1 + decay_rate * floor(step / decay_step)) -#' } -#' ``` -#' -#' You can pass this schedule directly into a keras Optimizer -#' as the `learning_rate`. -#' -#' Example: Fit a Keras model when decaying `1/t` with a rate of `0.5`: -#' -#' ```R -#' ... -#' initial_learning_rate <- 0.1 -#' decay_steps <- 1.0 -#' decay_rate <- 0.5 -#' learning_rate_fn <- learning_rate_schedule_inverse_time_decay( -#' initial_learning_rate, decay_steps, decay_rate) -#' -#' model %>% -#' compile(optimizer = optimizer_sgd(learning_rate = learning_rate_fn), -#' loss = 'sparse_categorical_crossentropy', -#' metrics = 'accuracy') -#' -#' model %>% fit(data, labels, epochs = 5) -#' ``` -#' -#' @param initial_learning_rate A scalar `float32` or `float64` `Tensor` or an -#' R number. The initial learning rate. -#' @param decay_steps A scalar `int32` or `int64` `Tensor` or an R number. How -#' often to apply decay. -#' @param decay_rate An R number. The decay rate. -#' @param staircase Boolean. Whether to apply decay in a discrete staircase, as -#' opposed to continuous, fashion. -#' @param name String. Optional name of the operation. Defaults to -#' 'InverseTimeDecay'. -#' @param ... For backwards and forwards compatibility -#' -#' @seealso -#' + -#' @export -learning_rate_schedule_inverse_time_decay <- -function(initial_learning_rate, decay_steps, decay_rate, staircase = FALSE, ..., - name = NULL) { - args <- capture_args(match.call(), list(decay_steps = as_integer)) - do.call(keras$optimizers$schedules$InverseTimeDecay, args) -} - - -#' A LearningRateSchedule that uses a piecewise constant decay schedule -#' -#' @details -#' The function returns a 1-arg callable to compute the piecewise constant -#' when passed the current optimizer step. This can be useful for changing the -#' learning rate value across different invocations of optimizer functions. -#' -#' Example: use a learning rate that's 1.0 for the first 100001 steps, 0.5 -#' for the next 10000 steps, and 0.1 for any additional steps. -#' -#' ```R -#' step <- tf$Variable(0, trainable=FALSE) -#' boundaries <- as.integer(c(100000, 110000)) -#' values <- c(1.0, 0.5, 0.1) -#' learning_rate_fn <- learning_rate_schedule_piecewise_constant_decay( -#' boundaries, values) -#' -#' # Later, whenever we perform an optimization step, we pass in the step. -#' learning_rate <- learning_rate_fn(step) -#' ``` -#' -#' You can pass this schedule directly into a keras Optimizer -#' as the `learning_rate`. -#' -#' @param boundaries A list of `Tensor`s or R numerics with strictly increasing -#' entries, and with all elements having the same type as the optimizer step. -#' @param values A list of `Tensor`s or R numerics that specifies the -#' values for the intervals defined by `boundaries`. It should have one more -#' element than `boundaries`, and all elements should have the same type. -#' @param name A string. Optional name of the operation. Defaults to -#' 'PiecewiseConstant'. -#' @param ... For backwards and forwards compatibility -#' -#' @seealso -#' + -#' @export -learning_rate_schedule_piecewise_constant_decay <- -function(boundaries, values, ..., name = NULL) { - args <- capture_args(match.call()) - do.call(keras$optimizers$schedules$PiecewiseConstantDecay, - args) -} - - -#' A LearningRateSchedule that uses a polynomial decay schedule -#' -#' @details -#' It is commonly observed that a monotonically decreasing learning rate, whose -#' degree of change is carefully chosen, results in a better performing model. -#' This schedule applies a polynomial decay function to an optimizer step, -#' given a provided `initial_learning_rate`, to reach an `end_learning_rate` -#' in the given `decay_steps`. -#' -#' It requires a `step` value to compute the decayed learning rate. You -#' can just pass a TensorFlow variable that you increment at each training -#' step. -#' -#' The schedule is a 1-arg callable that produces a decayed learning rate -#' when passed the current optimizer step. This can be useful for changing the -#' learning rate value across different invocations of optimizer functions. -#' It is computed as: -#' -#' ```R -#' decayed_learning_rate <- function(step) { -#' step <- min(step, decay_steps) -#' ((initial_learning_rate - end_learning_rate) * -#' (1 - step / decay_steps) ^ (power) -#' ) + end_learning_rate -#' } -#' ``` -#' -#' If `cycle` is `TRUE` then a multiple of `decay_steps` is used, the first one -#' that is bigger than `step`. -#' -#' ```python -#' decayed_learning_rate <- function(step) { -#' decay_steps <- decay_steps * ceiling(step / decay_steps) -#' ((initial_learning_rate - end_learning_rate) * -#' (1 - step / decay_steps) ^ (power) -#' ) + end_learning_rate -#' } -#' ``` -#' -#' You can pass this schedule directly into a keras Optimizer -#' as the `learning_rate`. -#' -#' Example: Fit a model while decaying from 0.1 to 0.01 in 10000 steps using -#' sqrt (i.e. power=0.5): -#' -#' ```R -#' ... -#' starter_learning_rate <- 0.1 -#' end_learning_rate <- 0.01 -#' decay_steps <- 10000 -#' learning_rate_fn <- learning_rate_schedule_polynomial_decay( -#' starter_learning_rate, decay_steps, end_learning_rate, power = 0.5) -#' -#' model %>% -#' compile(optimizer = optimizer_sgd(learning_rate = learning_rate_fn), -#' loss = 'sparse_categorical_crossentropy', -#' metrics = 'accuracy') -#' -#' model %>% fit(data, labels, epochs = 5) -#' ``` -#' -#' @param initial_learning_rate A scalar `float32` or `float64` `Tensor` or an -#' R number. The initial learning rate. -#' @param decay_steps A scalar `int32` or `int64` `Tensor` or an R number. -#' Must be positive. See the decay computation above. -#' @param end_learning_rate A scalar `float32` or `float64` `Tensor` or an -#' R number. The minimal end learning rate. -#' @param power A scalar `float32` or `float64` `Tensor` or an R number. -#' The power of the polynomial. Defaults to linear, 1.0. -#' @param cycle A boolean, -#' whether or not it should cycle beyond decay_steps. -#' @param name String. Optional name of the operation. Defaults to -#' 'PolynomialDecay'. -#' -#' @param ... For backwards and forwards compatibility -#' -#' @seealso -#' + -#' @export -learning_rate_schedule_polynomial_decay <- -function(initial_learning_rate, decay_steps, end_learning_rate = 1e-04, - power = 1, cycle = FALSE, ..., name = NULL) -{ - args <- capture_args(match.call(), list(decay_steps = as_integer)) - do.call(keras$optimizers$schedules$PolynomialDecay, args) -} - - - -# TODO: still need to add tests for all these. -# TODO: should all optimizer accept a plain R function to `learning_rate`? - - -#' Create a new learning rate schedule type -#' -#' @param classname string -#' @param ... methods and properties of the schedule class -#' @param call function which takes a step argument (scalar integer tensor, the -#' current training step count, and returns the new learning rate). For -#' tracking additional state, objects `self` and `private` are automatically -#' injected into the scope of the function. -#' @param initialize,get_config Additional recommended methods to implement. -#' -#' + -#' @return A `LearningRateSchedule` class generator. -#' @export -new_learning_rate_schedule_class <- -function(classname, ..., initialize = NULL, call, get_config = NULL) { - members <- capture_args(match.call(), ignore = "classname") - members <- drop_nulls(members) - members <- rename_to_dunder(members, "call") - if (!is.null(members[["call"]])) { - if ("__call__" %in% names(members)) - warning("`call()` method is ignored, superceded by `__call__`() method.") - else - names(members)[match("call", names(members))] <- "__call__" - } - - new_py_class( - classname, - members = members, - inherit = keras$optimizers$schedules$LearningRateSchedule, - parent_env = parent.frame(), - convert = TRUE - ) -} - - -rename_to_dunder <- function(members, nms) { - if(anyDuplicated(names(members))) - stop("All names must be unique") - for (nm in nms) { - .__nm__ <- paste0("__", nm, "__") - if (nm %in% names(members)) { - if (.__nm__ %in% names(members)) - warning("`", nm, "` method is ignored, superceded by `", .__nm__, "` method.") - else - names(members)[match(nm, names(members))] <- .__nm__ - } - } - members -} diff --git a/R/losses.R b/R/losses.R index bf9b0769a6..8bc3acb07b 100644 --- a/R/losses.R +++ b/R/losses.R @@ -1,376 +1,1674 @@ -#' @title Loss functions -#' @rdname loss-functions -#' @name loss-functions +#' Computes the cross-entropy loss between true labels and predicted labels. #' -#' @param y_true Ground truth values. shape = `[batch_size, d1, .. dN]`. -#' @param y_pred The predicted values. shape = `[batch_size, d1, .. dN]`. -#' (Tensor of the same shape as `y_true`) +#' @description +#' Use this cross-entropy loss for binary (0 or 1) classification applications. +#' The loss function requires the following inputs: #' -#' @param axis The axis along which to compute crossentropy (the features axis). -#' Axis is 1-based (e.g, first axis is `axis=1`). Defaults to `-1` (the last axis). +#' - `y_true` (true label): This is either 0 or 1. +#' - `y_pred` (predicted value): This is the model's prediction, i.e, a single +#' floating-point value which either represents a +#' [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in `[-inf, inf]` +#' when `from_logits=TRUE`) or a probability (i.e, value in `[0., 1.]` when +#' `from_logits=FALSE`). #' -#' @param ... Additional arguments passed on to the Python callable (for forward -#' and backwards compatibility). +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6)) +#' loss <- loss_binary_crossentropy(y_true, y_pred) +#' loss +#' ``` +#' **Recommended Usage:** (set `from_logits=TRUE`) #' -#' @param reduction Only applicable if `y_true` and `y_pred` are missing. Type -#' of `keras$losses$Reduction` to apply to loss. Default value is `AUTO`. -#' `AUTO` indicates that the reduction option will be determined by the usage -#' context. For almost all cases this defaults to `SUM_OVER_BATCH_SIZE`. When -#' used with `tf$distribute$Strategy`, outside of built-in training loops such -#' as `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an -#' error. Please see this custom training [tutorial]( -#' https://www.tensorflow.org/tutorials/distribute/custom_training) for more -#' details. +#' With `compile()` API: #' -#' @param name Only applicable if `y_true` and `y_pred` are missing. Optional -#' name for the Loss instance. +#' ```{r, eval = FALSE} +#' model %>% compile( +#' loss = loss_binary_crossentropy(from_logits=TRUE), +#' ... +#' ) +#' ``` #' -#' @details Loss functions for model training. These are typically supplied in -#' the `loss` parameter of the [compile.keras.engine.training.Model()] -#' function. +#' As a standalone function: #' -#' @returns If called with `y_true` and `y_pred`, then the corresponding loss is -#' evaluated and the result returned (as a tensor). Alternatively, if `y_true` -#' and `y_pred` are missing, then a callable is returned that will compute the -#' loss function and, by default, reduce the loss to a scalar tensor; see the -#' `reduction` parameter for details. (The callable is a typically a class -#' instance that inherits from `keras$losses$Loss`). +#' ```{r} +#' # Example 1: (batch_size = 1, number of samples = 4) +#' y_true <- op_array(c(0, 1, 0, 0)) +#' y_pred <- op_array(c(-18.6, 0.51, 2.94, -12.8)) +#' bce <- loss_binary_crossentropy(from_logits = TRUE) +#' bce(y_true, y_pred) +#' ``` #' -#' @seealso [compile.keras.engine.training.Model()], -#' [loss_binary_crossentropy()] +#' ```{r} +#' # Example 2: (batch_size = 2, number of samples = 4) +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(-18.6, 0.51), c(2.94, -12.8)) +#' # Using default 'auto'/'sum_over_batch_size' reduction type. +#' bce <- loss_binary_crossentropy(from_logits = TRUE) +#' bce(y_true, y_pred) #' -NULL - - - -#' @section binary_crossentropy: +#' # Using 'sample_weight' attribute +#' bce(y_true, y_pred, sample_weight = c(0.8, 0.2)) +#' # 0.243 +#' # Using 'sum' reduction` type. +#' bce <- loss_binary_crossentropy(from_logits = TRUE, reduction = "sum") +#' bce(y_true, y_pred) +#' +#' # Using 'none' reduction type. +#' bce <- loss_binary_crossentropy(from_logits = TRUE, reduction = NULL) +#' bce(y_true, y_pred) +#' ``` +#' +#' **Default Usage:** (set `from_logits=FALSE`) +#' +#' ```{r} +#' # Make the following updates to the above "Recommended Usage" section +#' # 1. Set `from_logits=FALSE` +#' loss_binary_crossentropy() # OR ...('from_logits=FALSE') +#' # 2. Update `y_pred` to use probabilities instead of logits +#' y_pred <- c(0.6, 0.3, 0.2, 0.8) # OR [[0.6, 0.3], [0.2, 0.8]] +#' ``` +#' +#' @returns +#' Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. #' -#' Computes the binary crossentropy loss. +#' @param from_logits +#' Whether to interpret `y_pred` as a tensor of +#' [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we +#' assume that `y_pred` is probabilities (i.e., values in `[0, 1)).` #' -#' `label_smoothing` details: Float in `[0, 1]`. If `> 0` then smooth the labels -#' by squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing` -#' for the target class and `0.5 * label_smoothing` for the non-target class. +#' @param label_smoothing +#' Float in range `[0, 1].` When 0, no smoothing occurs. +#' When > 0, we compute the loss between the predicted labels +#' and a smoothed version of the true labels, where the smoothing +#' squeezes the labels towards 0.5. Larger values of +#' `label_smoothing` correspond to heavier smoothing. #' -#' @param from_logits Whether `y_pred` is expected to be a logits tensor. By -#' default, we assume that `y_pred` encodes a probability distribution. +#' @param axis +#' The axis along which to compute crossentropy (the features axis). +#' Defaults to `-1`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. #' -#' @rdname loss-functions -#' @aliases "binary_crossentropy", "BinaryCrossentropy" #' @export +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.BinaryCrossentropy loss_binary_crossentropy <- - function(y_true, y_pred, - from_logits = FALSE, label_smoothing = 0, axis = -1L, - ..., reduction = "auto", name = "binary_crossentropy") { - args <- capture_args(match.call(), list(axis = as_axis)) - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$BinaryCrossentropy - else - keras$losses$binary_crossentropy - do.call(py_callable, args) - } -attr(loss_binary_crossentropy, "py_function_name") <- "binary_crossentropy" +function (y_true, y_pred, from_logits = FALSE, label_smoothing = 0, + axis = -1L, ..., reduction = "sum_over_batch_size", name = "binary_crossentropy") +{ + args <- capture_args(list(axis = as_axis, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$BinaryCrossentropy + else keras$losses$binary_crossentropy + do.call(callable, args) +} +#' Computes focal cross-entropy loss between true labels and predictions. +#' +#' @description +#' According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it +#' helps to apply a focal factor to down-weight easy examples and focus more on +#' hard examples. By default, the focal tensor is computed as follows: +#' +#' `focal_factor = (1 - output)^gamma` for class 1 +#' `focal_factor = output^gamma` for class 0 +#' where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal +#' effect on the binary crossentropy loss. +#' +#' If `apply_class_balancing == TRUE`, this function also takes into account a +#' weight balancing factor for the binary classes 0 and 1 as follows: +#' +#' `weight = alpha` for class 1 (`target == 1`) +#' `weight = 1 - alpha` for class 0 +#' where `alpha` is a float in the range of `[0, 1]`. +#' +#' Binary cross-entropy loss is often used for binary (0 or 1) classification +#' tasks. The loss function requires the following inputs: +#' +#' - `y_true` (true label): This is either 0 or 1. +#' - `y_pred` (predicted value): This is the model's prediction, i.e, a single +#' floating-point value which either represents a +#' [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in `[-inf, inf]` +#' when `from_logits=TRUE`) or a probability (i.e, value in `[0., 1.]` when +#' `from_logits=FALSE`). +#' +#' According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it +#' helps to apply a "focal factor" to down-weight easy examples and focus more +#' on hard examples. By default, the focal tensor is computed as follows: +#' +#' `focal_factor = (1 - output) ** gamma` for class 1 +#' `focal_factor = output ** gamma` for class 0 +#' where `gamma` is a focusing parameter. When `gamma=0`, this function is +#' equivalent to the binary crossentropy loss. +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6)) +#' loss <- loss_binary_focal_crossentropy(y_true, y_pred, gamma = 2) +#' loss +#' ``` +#' With the `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' loss = loss_binary_focal_crossentropy( +#' gamma = 2.0, from_logits = TRUE), +#' ... +#' ) +#' ``` +#' +#' As a standalone function: +#' +#' ```{r} +#' # Example 1: (batch_size = 1, number of samples = 4) +#' y_true <- op_array(c(0, 1, 0, 0)) +#' y_pred <- op_array(c(-18.6, 0.51, 2.94, -12.8)) +#' loss <- loss_binary_focal_crossentropy(gamma = 2, from_logits = TRUE) +#' loss(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Apply class weight +#' loss <- loss_binary_focal_crossentropy( +#' apply_class_balancing = TRUE, gamma = 2, from_logits = TRUE) +#' loss(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Example 2: (batch_size = 2, number of samples = 4) +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(-18.6, 0.51), c(2.94, -12.8)) +#' # Using default 'auto'/'sum_over_batch_size' reduction type. +#' loss <- loss_binary_focal_crossentropy( +#' gamma = 3, from_logits = TRUE) +#' loss(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Apply class weight +#' loss <- loss_binary_focal_crossentropy( +#' apply_class_balancing = TRUE, gamma = 3, from_logits = TRUE) +#' loss(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Using 'sample_weight' attribute with focal effect +#' loss <- loss_binary_focal_crossentropy( +#' gamma = 3, from_logits = TRUE) +#' loss(y_true, y_pred, sample_weight = c(0.8, 0.2)) +#' ``` +#' +#' ```{r} +#' # Apply class weight +#' loss <- loss_binary_focal_crossentropy( +#' apply_class_balancing = TRUE, gamma = 3, from_logits = TRUE) +#' loss(y_true, y_pred, sample_weight = c(0.8, 0.2)) +#' ``` +#' +#' ```{r} +#' # Using 'sum' reduction` type. +#' loss <- loss_binary_focal_crossentropy( +#' gamma = 4, from_logits = TRUE, +#' reduction = "sum") +#' loss(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Apply class weight +#' loss <- loss_binary_focal_crossentropy( +#' apply_class_balancing = TRUE, gamma = 4, from_logits = TRUE, +#' reduction = "sum") +#' loss(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Using 'none' reduction type. +#' loss <- loss_binary_focal_crossentropy( +#' gamma = 5, from_logits = TRUE, +#' reduction = NULL) +#' loss(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Apply class weight +#' loss <- loss_binary_focal_crossentropy( +#' apply_class_balancing = TRUE, gamma = 5, from_logits = TRUE, +#' reduction = NULL) +#' loss(y_true, y_pred) +#' ``` +#' +#' @returns +#' Binary focal crossentropy loss value +#' with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param apply_class_balancing +#' A bool, whether to apply weight balancing on the +#' binary classes 0 and 1. +#' +#' @param alpha +#' A weight balancing factor for class 1, default is `0.25` as +#' mentioned in reference [Lin et al., 2018]( +#' https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is +#' `1.0 - alpha`. +#' +#' @param gamma +#' A focusing parameter used to compute the focal factor, default is +#' `2.0` as mentioned in the reference +#' [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). +#' +#' @param from_logits +#' Whether to interpret `y_pred` as a tensor of +#' [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we +#' assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). +#' +#' @param label_smoothing +#' Float in `[0, 1]`. When `0`, no smoothing occurs. +#' When > `0`, we compute the loss between the predicted labels +#' and a smoothed version of the true labels, where the smoothing +#' squeezes the labels towards `0.5`. +#' Larger values of `label_smoothing` correspond to heavier smoothing. +#' +#' @param axis +#' The axis along which to compute crossentropy (the features axis). +#' Defaults to `-1`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values, of shape `(batch_size, d0, .. dN)`. +#' +#' @param y_pred +#' The predicted values, of shape `(batch_size, d0, .. dN)`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family losses +# @seealso +# + +#' +#' @tether keras.losses.BinaryFocalCrossentropy +loss_binary_focal_crossentropy <- +function (y_true, y_pred, apply_class_balancing = FALSE, + alpha = 0.25, gamma = 2, from_logits = FALSE, label_smoothing = 0, + axis = -1L, ..., reduction = "sum_over_batch_size", name = "binary_focal_crossentropy") +{ + args <- capture_args(list(axis = as_axis, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$BinaryFocalCrossentropy + else keras$losses$binary_focal_crossentropy + do.call(callable, args) +} + -#' @section categorical_crossentropy: +#' Computes the crossentropy loss between the labels and predictions. #' -#' Computes the categorical crossentropy loss. +#' @description +#' Use this crossentropy loss function when there are two or more label +#' classes. We expect labels to be provided in a `one_hot` representation. If +#' you want to provide labels as integers, please use +#' `SparseCategoricalCrossentropy` loss. There should be `num_classes` floating +#' point values per feature, i.e., the shape of both `y_pred` and `y_true` are +#' `[batch_size, num_classes]`. #' -#' When using the categorical_crossentropy loss, your targets should be in -#' categorical format (e.g. if you have 10 classes, the target for each sample -#' should be a 10-dimensional vector that is all-zeros except for a 1 at the -#' index corresponding to the class of the sample). In order to convert -#' integer targets into categorical targets, you can use the Keras utility -#' function [to_categorical()]: +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1, 0), c(0, 0, 1)) +#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' loss <- loss_categorical_crossentropy(y_true, y_pred) +#' loss +#' ``` +#' Standalone usage: +#' +#' ```{r} +#' y_true <- rbind(c(0, 1, 0), c(0, 0, 1)) +#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' # Using 'auto'/'sum_over_batch_size' reduction type. +#' cce <- loss_categorical_crossentropy() +#' cce(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Calling with 'sample_weight'. +#' cce(y_true, y_pred, sample_weight = op_array(c(0.3, 0.7))) +#' ``` +#' +#' ```{r} +#' # Using 'sum' reduction type. +#' cce <- loss_categorical_crossentropy(reduction = "sum") +#' cce(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Using 'none' reduction type. +#' cce <- loss_categorical_crossentropy(reduction = NULL) +#' cce(y_true, y_pred) +#' ``` +#' +#' Usage with the `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer = 'sgd', +#' loss=loss_categorical_crossentropy()) +#' ``` +#' +#' @returns +#' Categorical crossentropy loss value. +#' +#' @param from_logits +#' Whether `y_pred` is expected to be a logits tensor. By +#' default, we assume that `y_pred` encodes a probability distribution. +#' +#' @param label_smoothing +#' Float in `[0, 1].` When > 0, label values are smoothed, +#' meaning the confidence on label values are relaxed. For example, if +#' `0.1`, use `0.1 / num_classes` for non-target labels and +#' `0.9 + 0.1 / num_classes` for target labels. +#' +#' @param axis +#' The axis along which to compute crossentropy (the features +#' axis). Defaults to `-1`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. #' -#' `categorical_labels <- to_categorical(int_labels, num_classes = NULL)` +#' @param name +#' Optional name for the loss instance. #' -#' @param from_logits Whether `y_pred` is expected to be a logits tensor. By -#' default we assume that `y_pred` encodes a probability distribution. -#' @param label_smoothing Float in `[0, 1]`. If `> 0` then smooth the labels. -#' For example, if `0.1`, use `0.1 / num_classes` for non-target labels and -#' `0.9 + 0.1 / num_classes` for target labels. +#' @param y_true +#' Tensor of one-hot true targets. +#' +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. #' -#' @rdname loss-functions #' @export +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.CategoricalCrossentropy loss_categorical_crossentropy <- - function(y_true, y_pred, - from_logits = FALSE, label_smoothing = 0L, axis = -1L, - ..., reduction = "auto", name = "categorical_crossentropy") { - args <- capture_args(match.call(), list(axis = as_axis)) - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$CategoricalCrossentropy - else - keras$losses$categorical_crossentropy - do.call(py_callable, args) +function (y_true, y_pred, from_logits = FALSE, label_smoothing = 0, + axis = -1L, ..., reduction = "sum_over_batch_size", name = "categorical_crossentropy") +{ + args <- capture_args(list(axis = as_axis, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$CategoricalCrossentropy + else keras$losses$categorical_crossentropy + do.call(callable, args) } -attr(loss_categorical_crossentropy, "py_function_name") <- "categorical_crossentropy" -c("categorical_crossentropy", "CategoricalCrossentropy") +#' Computes the alpha balanced focal crossentropy loss. +#' +#' @description +#' Use this crossentropy loss function when there are two or more label +#' classes and if you want to handle class imbalance without using +#' `class_weights`. We expect labels to be provided in a `one_hot` +#' representation. +#' +#' According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it +#' helps to apply a focal factor to down-weight easy examples and focus more on +#' hard examples. The general formula for the focal loss (FL) +#' is as follows: +#' +#' `FL(p_t) = (1 - p_t)^gamma * log(p_t)` +#' +#' where `p_t` is defined as follows: +#' `p_t = output if y_true == 1, else 1 - output` +#' +#' `(1 - p_t)^gamma` is the `modulating_factor`, where `gamma` is a focusing +#' parameter. When `gamma` = 0, there is no focal effect on the cross entropy. +#' `gamma` reduces the importance given to simple examples in a smooth manner. +#' +#' The authors use alpha-balanced variant of focal loss (FL) in the paper: +#' `FL(p_t) = -alpha * (1 - p_t)^gamma * log(p_t)` +#' +#' where `alpha` is the weight factor for the classes. If `alpha` = 1, the +#' loss won't be able to handle class imbalance properly as all +#' classes will have the same weight. This can be a constant or a list of +#' constants. If alpha is a list, it must have the same length as the number +#' of classes. +#' +#' The formula above can be generalized to: +#' `FL(p_t) = alpha * (1 - p_t)^gamma * CrossEntropy(y_true, y_pred)` +#' +#' where minus comes from `CrossEntropy(y_true, y_pred)` (CE). +#' +#' Extending this to multi-class case is straightforward: +#' `FL(p_t) = alpha * (1 - p_t) ** gamma * CategoricalCE(y_true, y_pred)` +#' +#' In the snippet below, there is `num_classes` floating pointing values per +#' example. The shape of both `y_pred` and `y_true` are +#' `(batch_size, num_classes)`. +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1, 0), c(0, 0, 1)) +#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' loss <- loss_categorical_focal_crossentropy(y_true, y_pred) +#' loss +#' ``` +#' Standalone usage: +#' +#' ```{r} +#' y_true <- rbind(c(0, 1, 0), c(0, 0, 1)) +#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' # Using 'auto'/'sum_over_batch_size' reduction type. +#' cce <- loss_categorical_focal_crossentropy() +#' cce(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Calling with 'sample_weight'. +#' cce(y_true, y_pred, sample_weight = op_array(c(0.3, 0.7))) +#' ``` +#' +#' ```{r} +#' # Using 'sum' reduction type. +#' cce <- loss_categorical_focal_crossentropy(reduction = "sum") +#' cce(y_true, y_pred) +#' ``` +#' +#' ```{r} +#' # Using 'none' reduction type. +#' cce <- loss_categorical_focal_crossentropy(reduction = NULL) +#' cce(y_true, y_pred) +#' ``` +#' +#' Usage with the `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'adam', +#' loss = loss_categorical_focal_crossentropy()) +#' ``` +#' +#' @returns +#' Categorical focal crossentropy loss value. +#' +#' @param alpha +#' A weight balancing factor for all classes, default is `0.25` as +#' mentioned in the reference. It can be a list of floats or a scalar. +#' In the multi-class case, alpha may be set by inverse class +#' frequency by using `compute_class_weight` from `sklearn.utils`. +#' +#' @param gamma +#' A focusing parameter, default is `2.0` as mentioned in the +#' reference. It helps to gradually reduce the importance given to +#' simple examples in a smooth manner. When `gamma` = 0, there is +#' no focal effect on the categorical crossentropy. +#' +#' @param from_logits +#' Whether `output` is expected to be a logits tensor. By +#' default, we consider that `output` encodes a probability +#' distribution. +#' +#' @param label_smoothing +#' Float in `[0, 1].` When > 0, label values are smoothed, +#' meaning the confidence on label values are relaxed. For example, if +#' `0.1`, use `0.1 / num_classes` for non-target labels and +#' `0.9 + 0.1 / num_classes` for target labels. +#' +#' @param axis +#' The axis along which to compute crossentropy (the features +#' axis). Defaults to `-1`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Tensor of one-hot true targets. +#' +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family losses +# @seealso +# + +#' +#' @tether keras.losses.CategoricalFocalCrossentropy +loss_categorical_focal_crossentropy <- +function (y_true, y_pred, alpha = 0.25, gamma = 2, + from_logits = FALSE, label_smoothing = 0, axis = -1L, ..., + reduction = "sum_over_batch_size", name = "categorical_focal_crossentropy") +{ + args <- capture_args(list(axis = as_axis, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$CategoricalFocalCrossentropy + else keras$losses$categorical_focal_crossentropy + do.call(callable, args) +} + -#' @rdname loss-functions +#' Computes the categorical hinge loss between `y_true` & `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- maximum(neg - pos + 1, 0) +#' ``` +#' +#' where `neg=maximum((1-y_true)*y_pred)` and `pos=sum(y_true*y_pred)` +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6)) +#' loss <- loss_categorical_hinge(y_true, y_pred) +#' ``` +#' +#' @returns +#' Categorical hinge loss values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' The ground truth values. `y_true` values are expected to be +#' either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor) with +#' shape <- `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.CategoricalHinge loss_categorical_hinge <- - function(y_true, y_pred, - ..., reduction = "auto", name = "categorical_hinge") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$CategoricalHinge - else - keras$losses$categorical_hinge - do.call(py_callable, args) +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "categorical_hinge") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$CategoricalHinge + else keras$losses$categorical_hinge + do.call(callable, args) } -attr(loss_categorical_hinge, "py_function_name") <- "categorical_hinge" -c("categorical_hinge", "CategoricalHinge") -# LossCategoricalHinge -# keras$losses$CategoricalHinge() - -#' @rdname loss-functions +#' Computes the cosine similarity between `y_true` & `y_pred`. +#' +#' @description +#' Formula: +#' ```{r, eval = FALSE} +#' loss <- -sum(l2_norm(y_true) * l2_norm(y_pred)) +#' ``` +#' +#' Note that it is a number between -1 and 1. When it is a negative number +#' between -1 and 0, 0 indicates orthogonality and values closer to -1 +#' indicate greater similarity. This makes it usable as a loss function in a +#' setting where you try to maximize the proximity between predictions and +#' targets. If either `y_true` or `y_pred` is a zero vector, cosine +#' similarity will be 0 regardless of the proximity between predictions +#' and targets. +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0., 1.), c(1., 1.), c(1., 1.)) +#' y_pred <- rbind(c(1., 0.), c(1., 1.), c(-1., -1.)) +#' loss <- loss_cosine_similarity(y_true, y_pred, axis=-1) +#' loss +#' ``` +#' +#' @returns +#' Cosine similarity tensor. +#' +#' @param axis +#' The axis along which the cosine similarity is computed +#' (the features axis). Defaults to `-1`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Tensor of true targets. +#' +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export -loss_cosine_similarity <- function(y_true, y_pred, axis = -1L, - ..., reduction = "auto", name = "cosine_similarity") { - args <- capture_args(match.call(), list(axis = as_axis)) - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$CosineSimilarity - else - keras$losses$cosine_similarity - do.call(py_callable, args) +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.CosineSimilarity +loss_cosine_similarity <- +function (y_true, y_pred, axis = -1L, ..., reduction = "sum_over_batch_size", + name = "cosine_similarity") +{ + args <- capture_args(list(axis = as_axis, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$CosineSimilarity + else keras$losses$cosine_similarity + do.call(callable, args) } -attr(loss_cosine_similarity, "py_function_name") <- "cosine_similarity" -c("cosine_similarity", "CosineSimilarity") -#' @rdname loss-functions +#' Computes the Dice loss value between `y_true` and `y_pred`. +#' +#' @description +#' Formula: +#' ```python +#' loss = 1 - (2 * sum(y_true * y_pred)) / (sum(y_true) + sum(y_pred)) +#' ``` +#' +#' Formula: +#' ```python +#' loss = 1 - (2 * sum(y_true * y_pred)) / (sum(y_true) + sum(y_pred)) +#' ``` +#' +#' @returns +#' if `y_true` and `y_pred` are provided, Dice loss value. Otherwise, +#' a `Loss()` instance. +#' +#' @param y_true +#' tensor of true targets. +#' +#' @param y_pred +#' tensor of predicted targets. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export -loss_hinge <- function(y_true, y_pred, ..., reduction = "auto", name = "hinge") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$Hinge - else - keras$losses$hinge - do.call(py_callable, args) +#' @family losses +#' @tether keras.losses.Dice +loss_dice <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", name = "dice") +{ + args <- capture_args(list(y_true = as_py_array, y_pred = as_py_array)) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$Dice + else keras$losses$dice + do.call(callable, args) } -attr(loss_hinge, "py_function_name") <- "hinge" -c("hinge", "Hinge") -#' @section huber: +#' Computes the hinge loss between `y_true` & `y_pred`. #' -#' Computes Huber loss value. -#' For each value x in `error = y_true - y_pred`: +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- mean(maximum(1 - y_true * y_pred, 0), axis=-1) #' ``` -#' loss = 0.5 * x^2 if |x| <= d -#' loss = d * |x| - 0.5 * d^2 if |x| > d +#' +#' `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are +#' provided we will convert them to -1 or 1. +#' +#' # Examples +#' ```{r} +#' y_true <- array(sample(c(-1,1), 6, replace = TRUE), dim = c(2, 3)) +#' y_pred <- random_uniform(c(2, 3)) +#' loss <- loss_hinge(y_true, y_pred) +#' loss #' ``` -#' where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss #' -#' @param delta A float, the point where the Huber loss function changes from a -#' quadratic to linear. +#' @returns +#' Hinge loss values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' The ground truth values. `y_true` values are expected to be -1 +#' or 1. If binary (0 or 1) labels are provided they will be converted +#' to -1 or 1 with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. #' -#' @rdname loss-functions #' @export -loss_huber <- function(y_true, y_pred, delta = 1, ..., reduction = "auto", name = "huber_loss") { - args <- capture_args(match.call()) - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$Huber - else - keras$losses$huber - do.call(py_callable, args) +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.Hinge +loss_hinge <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "hinge") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$Hinge + else keras$losses$hinge + do.call(callable, args) } -attr(loss_huber, "py_function_name") <- "huber" -c("huber", "Huber") - -#' @rdname loss-functions -#' @export -loss_kullback_leibler_divergence <- - function(y_true, y_pred, - ..., reduction = "auto", name = "kl_divergence") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$KLDivergence - else - keras$losses$kullback_leibler_divergence - do.call(py_callable, args) - } -attr(loss_kullback_leibler_divergence, "py_function_name") <- "kullback_leibler_divergence" -c("kl_divergence", "kld", "KLD", "KLDivergence", "kullback_leibler_divergence") - -#' @rdname loss-functions +#' Computes the Huber loss between `y_true` & `y_pred`. +#' +#' @description +#' Formula: +#' ```{r, eval = FALSE} +#' for (x in error) { +#' if (abs(x) <= delta){ +#' loss <- c(loss, (0.5 * x^2)) +#' } else if (abs(x) > delta) { +#' loss <- c(loss, (delta * abs(x) - 0.5 * delta^2)) +#' } +#' } +#' loss <- mean(loss) +#' ``` +#' See: [Huber loss](https://en.wikipedia.org/wiki/Huber_loss). +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6)) +#' loss <- loss_huber(y_true, y_pred) +#' ``` +#' +#' @returns +#' Tensor with one scalar loss entry per sample. +#' +#' @param delta +#' A float, the point where the Huber loss function changes from a +#' quadratic to linear. Defaults to `1.0`. +#' +#' @param reduction +#' Type of reduction to apply to loss. Options are `"sum"`, +#' `"sum_over_batch_size"` or `NULL`. Defaults to +#' `"sum_over_batch_size"`. +#' +#' @param name +#' Optional name for the instance. +#' +#' @param y_true +#' tensor of true targets. +#' +#' @param y_pred +#' tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export -loss_kl_divergence <- loss_kullback_leibler_divergence +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.Huber +loss_huber <- +function (y_true, y_pred, delta = 1, ..., reduction = "sum_over_batch_size", + name = "huber_loss") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$Huber + else keras$losses$huber + do.call(callable, args) +} -#' @section log_cosh: + +#' Computes Kullback-Leibler divergence loss between `y_true` & `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval=FALSE} +#' loss <- y_true * log(y_true / y_pred) +#' ``` +#' +#' `y_true` and `y_pred` are expected to be probability +#' distributions, with values between 0 and 1. They will get +#' clipped to the `[0, 1]` range. +#' +#' # Examples +#' ```{r} +#' y_true <- random_uniform(c(2, 3), 0, 2) +#' y_pred <- random_uniform(c(2,3)) +#' loss <- loss_kl_divergence(y_true, y_pred) +#' loss +#' ``` +#' +#' @returns +#' KL Divergence loss values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Tensor of true targets. #' -#' Logarithm of the hyperbolic cosine of the prediction error. +#' @param y_pred +#' Tensor of predicted targets. #' -#' `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and -#' to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly -#' like the mean squared error, but will not be so strongly affected by the -#' occasional wildly incorrect prediction. However, it may return NaNs if the -#' intermediate value `cosh(y_pred - y_true)` is too large to be represented -#' in the chosen precision. +#' @param ... +#' For forward/backward compatability. #' -#' @rdname loss-functions #' @export -loss_logcosh <- function(y_true, y_pred, ..., reduction = "auto", name = "log_cosh") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$LogCosh - else - keras$losses$logcosh - do.call(py_callable, args) +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.KLDivergence +loss_kl_divergence <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "kl_divergence") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$KLDivergence + else keras$losses$kl_divergence + do.call(callable, args) } -attr(loss_logcosh, "py_function_name") <- "log_cosh" -c("log_cosh", "logcosh", "LogCosh") -#' @rdname loss-functions +#' Computes the logarithm of the hyperbolic cosine of the prediction error. +#' +#' @description +#' Formula: +#' ```{r, eval = FALSE} +#' loss <- mean(log(cosh(y_pred - y_true)), axis=-1) +#' ``` +#' +#' Note that `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small +#' `x` and to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works +#' mostly like the mean squared error, but will not be so strongly affected by +#' the occasional wildly incorrect prediction. +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0., 1.), c(0., 0.)) +#' y_pred <- rbind(c(1., 1.), c(0., 0.)) +#' loss <- loss_log_cosh(y_true, y_pred) +#' # 0.108 +#' ``` +#' +#' @returns +#' Logcosh error values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to loss. Options are `"sum"`, +#' `"sum_over_batch_size"` or `NULL`. Defaults to +#' `"sum_over_batch_size"`. +#' +#' @param name +#' Optional name for the instance. +#' +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export -loss_mean_absolute_error <- - function(y_true, y_pred, - ..., reduction = "auto", name = "mean_absolute_error") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$MeanAbsoluteError - else - keras$losses$mean_absolute_error - do.call(py_callable, args) +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.LogCosh +loss_log_cosh <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "log_cosh") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$LogCosh + else keras$losses$log_cosh + do.call(callable, args) } -attr(loss_mean_absolute_error, "py_function_name") <- "mean_absolute_error" -c("mae", "MAE", "mean_absolute_error", "MeanAbsoluteError") - +#' Computes the mean of absolute difference between labels and predictions. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- mean(abs(y_true - y_pred)) +#' ``` +#' +#' # Examples +#' ```{r} +#' y_true <- random_uniform(c(2, 3), 0, 2) +#' y_pred <- random_uniform(c(2, 3)) +#' loss <- loss_mean_absolute_error(y_true, y_pred) +#' ``` +#' +#' @returns +#' Mean absolute error values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.MeanAbsoluteError +loss_mean_absolute_error <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "mean_absolute_error") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$MeanAbsoluteError + else keras$losses$mean_absolute_error + do.call(callable, args) +} -#' @rdname loss-functions +#' Computes the mean absolute percentage error between `y_true` and `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- 100 * op_mean(op_abs((y_true - y_pred) / y_true), +#' axis=-1) +#' ``` +#' +#' Division by zero is prevented by dividing by `max(y_true, epsilon)` +#' where `epsilon = config_epsilon()` +#' (default to `1e-7`). +#' +#' # Examples +#' ```{r} +#' y_true <- random_uniform(c(2, 3)) +#' y_pred <- random_uniform(c(2, 3)) +#' loss <- loss_mean_absolute_percentage_error(y_true, y_pred) +#' ``` +#' +#' @returns +#' Mean absolute percentage error values with shape = `[batch_size, d0, ..dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.MeanAbsolutePercentageError loss_mean_absolute_percentage_error <- - function(y_true, y_pred, ..., reduction = "auto", name = "mean_absolute_percentage_error") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$MeanAbsolutePercentageError - else - keras$losses$mean_absolute_percentage_error - do.call(py_callable, args) +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "mean_absolute_percentage_error") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$MeanAbsolutePercentageError + else keras$losses$mean_absolute_percentage_error + do.call(callable, args) } -attr(loss_mean_absolute_percentage_error, "py_function_name") <- "mean_absolute_percentage_error" -c("mape", "MAPE", "mean_absolute_percentage_error", "MeanAbsolutePercentageError") -#' @rdname loss-functions + +#' Computes the mean of squares of errors between labels and predictions. +#' +#' @description +#' Formula: +#' +#' ```{r, eval=FALSE} +#' loss <- mean(square(y_true - y_pred)) +#' ``` +#' +#' # Examples +#' ```{r} +#' y_true <- random_uniform(c(2, 3), 0, 2) +#' y_pred <- random_uniform(c(2, 3)) +#' loss <- loss_mean_squared_error(y_true, y_pred) +#' ``` +#' +#' @returns +#' Mean squared error values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export -loss_mean_squared_error <- function(y_true, y_pred, - ..., reduction = "auto", name = "mean_squared_error") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$MeanSquaredError - else - keras$losses$mean_squared_error - do.call(py_callable, args) +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.MeanSquaredError +loss_mean_squared_error <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "mean_squared_error") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$MeanSquaredError + else keras$losses$mean_squared_error + do.call(callable, args) } -attr(loss_mean_squared_error, "py_function_name") <- "mean_squared_error" -c("mse", "MSE", "mean_squared_error", "MeanSquaredError") - -#' @rdname loss-functions +#' Computes the mean squared logarithmic error between `y_true` and `y_pred`. +#' +#' @description +#' Note that `y_pred` and `y_true` cannot be less or equal to `0`. Negative +#' values and `0` values will be replaced with `config_epsilon()` +#' (default to `1e-7`). +#' +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- mean(square(log(y_true + 1) - log(y_pred + 1))) +#' ``` +#' +#' # Examples +#' ```{r} +#' y_true <- random_uniform(c(2, 3), 0, 2) +#' y_pred <- random_uniform(c(2, 3)) +#' loss <- loss_mean_squared_logarithmic_error(y_true, y_pred) +#' ``` +#' +#' @returns +#' Mean squared logarithmic error values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.MeanSquaredLogarithmicError loss_mean_squared_logarithmic_error <- - function(y_true, y_pred, ..., - reduction = "auto", name = "mean_squared_logarithmic_error") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$MeanSquaredLogarithmicError - else - keras$losses$mean_squared_logarithmic_error - do.call(py_callable, args) +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "mean_squared_logarithmic_error") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$MeanSquaredLogarithmicError + else keras$losses$mean_squared_logarithmic_error + do.call(callable, args) } -attr(loss_mean_squared_logarithmic_error, "py_function_name") <- "mean_squared_logarithmic_error" -c("msle", "MSLE", "mean_squared_logarithmic_error", "MeanSquaredLogarithmicError") - -#' @rdname loss-functions +#' Computes the Poisson loss between `y_true` & `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval=FALSE} +#' loss <- y_pred - y_true * log(y_pred) +#' ``` +#' +#' # Examples +#' ```{r} +#' y_true <- random_uniform(c(2, 3), 0, 2) +#' y_pred <- random_uniform(c(2, 3)) +#' loss <- loss_poisson(y_true, y_pred) +#' loss +#' ``` +#' +#' @returns +#' Poisson loss values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export -loss_poisson <- function(y_true, y_pred, ..., reduction = "auto", name = "poisson") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$Poisson - else - keras$losses$poisson - do.call(py_callable, args) +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.Poisson +loss_poisson <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "poisson") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$Poisson + else keras$losses$poisson + do.call(callable, args) } -attr(loss_poisson, "py_function_name") <- "poisson" -c("poisson", "Poisson") -#' @rdname loss-functions +#' Computes the crossentropy loss between the labels and predictions. +#' +#' @description +#' Use this crossentropy loss function when there are two or more label +#' classes. We expect labels to be provided as integers. If you want to +#' provide labels using `one-hot` representation, please use +#' `CategoricalCrossentropy` loss. There should be `# classes` floating point +#' values per feature for `y_pred` and a single floating point value per +#' feature for `y_true`. +#' +#' In the snippet below, there is a single floating point value per example for +#' `y_true` and `num_classes` floating pointing values per example for +#' `y_pred`. The shape of `y_true` is `[batch_size]` and the shape of `y_pred` +#' is `[batch_size, num_classes]`. +#' +#' # Examples +#' ```{r} +#' y_true <- c(1, 2) +#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' loss <- loss_sparse_categorical_crossentropy(y_true, y_pred) +#' loss +#' ``` +#' ```{r} +#' y_true <- c(1, 2) +#' y_pred <- rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)) +#' # Using 'auto'/'sum_over_batch_size' reduction type. +#' scce <- loss_sparse_categorical_crossentropy() +#' scce(op_array(y_true), op_array(y_pred)) +#' # 1.177 +#' ``` +#' +#' ```{r} +#' # Calling with 'sample_weight'. +#' scce(op_array(y_true), op_array(y_pred), sample_weight = op_array(c(0.3, 0.7))) +#' ``` +#' +#' ```{r} +#' # Using 'sum' reduction type. +#' scce <- loss_sparse_categorical_crossentropy(reduction="sum") +#' scce(op_array(y_true), op_array(y_pred)) +#' # 2.354 +#' ``` +#' +#' ```{r} +#' # Using 'none' reduction type. +#' scce <- loss_sparse_categorical_crossentropy(reduction=NULL) +#' scce(op_array(y_true), op_array(y_pred)) +#' # array([0.0513, 2.303], dtype=float32) +#' ``` +#' +#' Usage with the `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer = 'sgd', +#' loss = loss_sparse_categorical_crossentropy()) +#' ``` +#' +#' @returns +#' Sparse categorical crossentropy loss value. +#' +#' @param from_logits +#' Whether `y_pred` is expected to be a logits tensor. By +#' default, we assume that `y_pred` encodes a probability distribution. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' Ground truth values. +#' +#' @param y_pred +#' The predicted values. +#' +#' @param ignore_class +#' Optional integer. The ID of a class to be ignored during +#' loss computation. This is useful, for example, in segmentation +#' problems featuring a "void" class (commonly -1 or 255) in +#' segmentation maps. By default (`ignore_class=NULL`), all classes are +#' considered. +#' +#' @param axis +#' Defaults to `-1`. The dimension along which the entropy is +#' computed. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.SparseCategoricalCrossentropy loss_sparse_categorical_crossentropy <- - function(y_true, y_pred, from_logits = FALSE, axis = -1L, - ..., reduction = "auto", name = "sparse_categorical_crossentropy") { - args <- capture_args(match.call(), list(axis = as_axis)) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$SparseCategoricalCrossentropy - else - keras$losses$sparse_categorical_crossentropy - do.call(py_callable, args) +function (y_true, y_pred, from_logits = FALSE, ignore_class = NULL, + axis = -1L, ..., reduction = "sum_over_batch_size", name = "sparse_categorical_crossentropy") +{ + args <- capture_args(list(ignore_class = as_integer, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x), axis = as_axis)) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$SparseCategoricalCrossentropy + else keras$losses$sparse_categorical_crossentropy + do.call(callable, args) } -attr(loss_sparse_categorical_crossentropy, "py_function_name") <- "sparse_categorical_crossentropy" -c("sparse_categorical_crossentropy", "SparseCategoricalCrossentropy") - -#' @rdname loss-functions +#' Computes the squared hinge loss between `y_true` & `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval=FALSE} +#' loss <- square(maximum(1 - y_true * y_pred, 0)) +#' ``` +#' +#' `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are +#' provided we will convert them to -1 or 1. +#' +#' # Examples +#' ```{r} +#' y_true <- array(sample(c(-1,1), 6, replace = TRUE), dim = c(2, 3)) +#' y_pred <- random_uniform(c(2, 3)) +#' loss <- loss_squared_hinge(y_true, y_pred) +#' ``` +#' +#' @returns +#' Squared hinge loss values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param reduction +#' Type of reduction to apply to the loss. In almost all cases +#' this should be `"sum_over_batch_size"`. +#' Supported options are `"sum"`, `"sum_over_batch_size"` or `NULL`. +#' +#' @param name +#' Optional name for the loss instance. +#' +#' @param y_true +#' The ground truth values. `y_true` values are expected to be -1 +#' or 1. If binary (0 or 1) labels are provided we will convert them +#' to -1 or 1 with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' #' @export -loss_squared_hinge <- function(y_true, y_pred, ..., reduction = "auto", name = "squared_hinge") { - args <- capture_args(match.call()) - - py_callable <- if (missing(y_true) && missing(y_pred)) - keras$losses$SquaredHinge - else - keras$losses$squared_hinge - do.call(py_callable, args) +#' @family losses +#' @seealso +#' + +# + +#' +#' @tether keras.losses.SquaredHinge +loss_squared_hinge <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "squared_hinge") +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$SquaredHinge + else keras$losses$squared_hinge + do.call(callable, args) } -attr(loss_squared_hinge, "py_function_name") <- "squared_hinge" -c("squared_hinge", "SquaredHinge") - - +#' CTC (Connectionist Temporal Classification) loss. +#' +#' @param y_true +#' A tensor of shape `(batch_size, target_max_length)` containing +#' the true labels in integer format. `0` always represents +#' the blank/mask index and should not be used for classes. +#' +#' @param y_pred +#' A tensor of shape `(batch_size, output_max_length, num_classes)` +#' containing logits (the output of your model). +#' They should *not* be normalized via softmax. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @returns +#' CTC loss value. +#' +#' @export +#' @inheritParams loss_hinge +#' @family losses +#' @tether keras.losses.CTC +# @seealso +# + +loss_ctc <- +function (y_true, y_pred, ..., reduction = "sum_over_batch_size", + name = "sparse_categorical_crossentropy") +{ + args <- capture_args(list(y_true = as_py_array, y_pred = as_py_array)) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$CTC + else keras$losses$ctc + do.call(callable, args) +} +#' Computes the Tversky loss value between `y_true` and `y_pred`. +#' +#' @description +#' This loss function is weighted by the alpha and beta coefficients +#' that penalize false positives and false negatives. +#' +#' With `alpha=0.5` and `beta=0.5`, the loss value becomes equivalent to +#' Dice Loss. +#' +#' This loss function is weighted by the alpha and beta coefficients +#' that penalize false positives and false negatives. +#' +#' With `alpha=0.5` and `beta=0.5`, the loss value becomes equivalent to +#' Dice Loss. +#' +#' # Reference +#' - [Salehi et al., 2017](https://arxiv.org/abs/1706.05721) +#' +#' @returns +#' Tversky loss value. +#' +#' @param y_true +#' tensor of true targets. +#' +#' @param y_pred +#' tensor of predicted targets. +#' +#' @param alpha +#' coefficient controlling incidence of false positives. +#' +#' @param beta +#' coefficient controlling incidence of false negatives. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @inheritParams loss_hinge +#' @family losses +#' @tether keras.losses.Tversky +loss_tversky <- +function (y_true, y_pred, ..., alpha = 0.5, beta = 0.5, + reduction = "sum_over_batch_size", name = "tversky") +{ + args <- capture_args(list(y_true = as_py_array, y_pred = as_py_array)) + callable <- if (missing(y_true) && missing(y_pred)) + keras$losses$Tversky else keras$losses$tversky + do.call(callable, args) +} -#' (Deprecated) loss_cosine_proximity -#' -#' `loss_cosine_proximity` is deprecated and will be removed in a future -#' version. It has been renamed to `loss_cosine_similarity`(). -#' -#' @param ... passed on to [loss_cosine_similarity()] -#' -#' @keywords internal +#' @importFrom reticulate py_to_r_wrapper #' @export -loss_cosine_proximity <- function(...) { - warning("loss_cosine_proximity is deprecated and will be removed in a future version.", - " Please use loss_cosine_similarity instead.") - loss_cosine_similarity(...) +#' @keywords internal +#' Wrapper for Loss/Metric instances that automatically coerces `y_true` and `y_pred` to the appropriate type. +py_to_r_wrapper.keras.src.losses.loss.Loss <- function(x) { + force(x) + as.function.default(c(formals(x), quote({ + args <- capture_args(list(y_true = as_py_array, + y_pred = as_py_array, + sample_weight = as_py_array)) + do.call(x, args) + }))) } -attr(loss_cosine_proximity, "py_function_name") <- "cosine_proximity" diff --git a/R/metrics-callback.R b/R/metrics-callback.R deleted file mode 100644 index cac1597366..0000000000 --- a/R/metrics-callback.R +++ /dev/null @@ -1,270 +0,0 @@ -KerasMetricsCallback <- R6::R6Class( - "KerasMetricsCallback", - - inherit = KerasCallback, - - public = list( - - # instance data - metrics = list(), - metrics_viewer = NULL, - view_metrics = FALSE, - - initialize = function(view_metrics = FALSE) { - self$view_metrics <- view_metrics - }, - - on_train_begin = function(logs = NULL) { - - # strip validation metrics if do_validation is FALSE (for - # fit_generator and fitting TF record the val_ metrics are - # passed even though no data will be provided for them) - if (!self$params$do_validation) { - self$params$metrics <- Filter(function(metric) { - !grepl("^val_", metric) - }, self$params$metrics) - } - - # initialize metrics - for (metric in self$params$metrics) - self$metrics[[metric]] <- numeric() - - # handle metrics - if (length(logs) > 0) - self$on_metrics(logs, 0.5) - - if (tfruns::is_run_active()) { - self$write_params(self$params) - self$write_model_info(self$model) - } - }, - - on_epoch_end = function(epoch, logs = NULL) { - - # handle metrics - self$on_metrics(logs, 0.1) - - }, - - on_metrics = function(logs, sleep) { - - # record metrics - for (metric in names(self$metrics)) { - # guard against metrics not yet available by using NA - # when a named metrics isn't passed in 'logs' - value <- logs[[metric]] - if (is.null(value)) - value <- NA - else - value <- mean(value) - self$metrics[[metric]] <- c(self$metrics[[metric]], value) - } - - # create history object and convert to metrics data frame - history <- keras_training_history(self$params, self$metrics) - metrics <- self$as_metrics_df(history) - - # view metrics if requested - if (self$view_metrics) { - - # create the metrics_viewer or update if we already have one - if (is.null(self$metrics_viewer)) { - self$metrics_viewer <- tfruns::view_run_metrics(metrics) - } - else { - tfruns::update_run_metrics(self$metrics_viewer, metrics) - } - - # pump events - Sys.sleep(sleep) - } - - # record metrics - tfruns::write_run_metadata("metrics", metrics) - - }, - - # convert keras history to metrics data frame suitable for plotting - as_metrics_df = function(history) { - - # create metrics data frame - df <- as.data.frame(history$metrics) - - # pad to epochs if necessary - pad <- history$params$epochs - nrow(df) - pad_data <- list() - for (metric in history$params$metrics) - pad_data[[metric]] <- rep_len(NA, pad) - df <- rbind(df, pad_data) - - # return df - df - }, - - write_params = function(params) { - properties <- list() - properties$samples <- params$samples - properties$validation_samples <- params$validation_samples - properties$epochs <- params$epochs - properties$batch_size <- params$batch_size - tfruns::write_run_metadata("properties", properties) - }, - - write_model_info = function(model) { - tryCatch({ - model_info <- list() - model_info$model <- py_str(model, line_length = 80L) - if (is.character(model$loss)) - model_info$loss_function <- model$loss - else if (inherits(model$loss, "python.builtin.function")) - model_info$loss_function <- model$loss$`__name__` - optimizer <- model$optimizer - if (!is.null(optimizer)) { - model_info$optimizer <- py_str(optimizer) - model_info$learning_rate <- k_eval(optimizer$lr) - } - tfruns::write_run_metadata("properties", model_info) - }, error = function(e) { - warning("Unable to log model info: ", e$message, call. = FALSE) - }) - - } - ) -) - -KerasMetricsCallbackV2 <- R6::R6Class( - "KerasMetricsCallbackV2", - - inherit = KerasCallback, - - public = list( - - # instance data - metrics = list(), - metrics_viewer = NULL, - view_metrics = FALSE, - initial_epoch = 0, - - initialize = function(view_metrics = FALSE, initial_epoch = 0) { - self$view_metrics <- view_metrics - self$initial_epoch <- initial_epoch - }, - - on_train_begin = function(logs = NULL) { - if (tfruns::is_run_active()) { - self$write_params(self$params) - self$write_model_info(self$model) - } - }, - - on_epoch_end = function(epoch, logs = NULL) { - - if (epoch - self$initial_epoch == 0) { - - metric_names <- names(logs) - for (metric in metric_names) - self$metrics[[metric]] <- numeric() - - sleep <- 0.5 - } else { - - sleep <- 0.1 - - } - - # handle metrics - self$on_metrics(logs, sleep) - - }, - - on_metrics = function(logs, sleep) { - - # record metrics - for (metric in names(self$metrics)) { - # guard against metrics not yet available by using NA - # when a named metrics isn't passed in 'logs' - value <- logs[[metric]] - if (is.null(value)) - value <- NA - else - value <- mean(value) - - self$metrics[[metric]] <- c(self$metrics[[metric]], value) - } - - # create history object and convert to metrics data frame - - history <- keras_training_history(self$params, self$metrics) - metrics <- self$as_metrics_df(history) - - # view metrics if requested - if (self$view_metrics) { - - # create the metrics_viewer or update if we already have one - if (is.null(self$metrics_viewer)) { - self$metrics_viewer <- tfruns::view_run_metrics(metrics) - } else { - tfruns::update_run_metrics(self$metrics_viewer, metrics) - } - - # pump events - Sys.sleep(sleep) - } - # record metrics - tfruns::write_run_metadata("metrics", metrics) - }, - - # convert keras history to metrics data frame suitable for plotting - as_metrics_df = function(history) { - - # create metrics data frame - df <- as.data.frame(history$metrics) - - # pad to epochs if necessary - pad <- history$params$epochs - nrow(df) - pad_data <- list() - - if (tensorflow::tf_version() < "2.2") - metric_names <- history$params$metrics - else - metric_names <- names(history$metrics) - - for (metric in metric_names) - pad_data[[metric]] <- rep_len(NA, pad) - - df <- rbind(df, pad_data) - - # return df - df - }, - - write_params = function(params) { - properties <- list() - properties$samples <- params$samples - properties$validation_samples <- params$validation_samples - properties$epochs <- params$epochs - properties$batch_size <- params$batch_size - tfruns::write_run_metadata("properties", properties) - }, - - write_model_info = function(model) { - tryCatch({ - model_info <- list() - model_info$model <- py_str(model, line_length = 80L) - if (is.character(model$loss)) - model_info$loss_function <- model$loss - else if (inherits(model$loss, "python.builtin.function")) - model_info$loss_function <- model$loss$`__name__` - optimizer <- model$optimizer - if (!is.null(optimizer)) { - model_info$optimizer <- py_str(optimizer) - model_info$learning_rate <- k_eval(optimizer$lr) - } - tfruns::write_run_metadata("properties", model_info) - }, error = function(e) { - warning("Unable to log model info: ", e$message, call. = FALSE) - }) - - } - ) -) diff --git a/R/metrics.R b/R/metrics.R index d50b189d33..6706b0b2e9 100644 --- a/R/metrics.R +++ b/R/metrics.R @@ -1,762 +1,861 @@ -#' Metric -#' -#' A `Metric` object encapsulates metric logic and state that can be used to -#' track model performance during training. It is what is returned by the family -#' of metric functions that start with prefix `metric_*`. + +#' Computes the binary focal crossentropy loss. #' -#' @param name (Optional) string name of the metric instance. -#' @param dtype (Optional) data type of the metric result. +#' @description +#' According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it +#' helps to apply a focal factor to down-weight easy examples and focus more on +#' hard examples. By default, the focal tensor is computed as follows: #' -#' @returns A (subclassed) `Metric` instance that can be passed directly to -#' `compile(metrics = )`, or used as a standalone object. See `?Metric` for -#' example usage. +#' `focal_factor = (1 - output)^gamma` for class 1 +#' `focal_factor = output^gamma` for class 0 +#' where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal +#' effect on the binary crossentropy loss. #' +#' If `apply_class_balancing == TRUE`, this function also takes into account a +#' weight balancing factor for the binary classes 0 and 1 as follows: #' -#' @section Usage with `compile`: -#' ```r -#' model %>% compile( -#' optimizer = 'sgd', -#' loss = 'mse', -#' metrics = list(metric_SOME_METRIC(), metric_SOME_OTHER_METRIC()) -#' ) -#' ``` +#' `weight = alpha` for class 1 (`target == 1`) +#' `weight = 1 - alpha` for class 0 +#' where `alpha` is a float in the range of `[0, 1]`. #' -#' @section Standalone usage: -#' ```r -#' m <- metric_SOME_METRIC() -#' for (e in seq(epochs)) { -#' for (i in seq(train_steps)) { -#' c(y_true, y_pred, sample_weight = NULL) %<-% ... -#' m$update_state(y_true, y_pred, sample_weight) -#' } -#' cat('Final epoch result: ', as.numeric(m$result()), "\n") -#' m$reset_state() -#' } +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6)) +#' loss <- loss_binary_focal_crossentropy(y_true, y_pred, gamma=2) +#' loss #' ``` #' -#' @section Custom Metric (subclass): -#' To be implemented by subclasses: -#' -#' * `initialize()`: All state variables should be created in this method by calling `self$add_weight()` like: -#' -#' self$var <- self$add_weight(...) +#' @returns +#' Binary focal crossentropy loss value +#' with shape = `[batch_size, d0, .. dN-1]`. #' -#' * `update_state()`: Has all updates to the state variables like: +#' @param y_true +#' Ground truth values, of shape `(batch_size, d0, .. dN)`. #' -#' self$var$assign_add(...) +#' @param y_pred +#' The predicted values, of shape `(batch_size, d0, .. dN)`. #' -#' * `result()`: Computes and returns a value for the metric from the state variables. +#' @param apply_class_balancing +#' A bool, whether to apply weight balancing on the +#' binary classes 0 and 1. #' -#' Example custom metric subclass: -#' ````R -#' metric_binary_true_positives <- new_metric_class( -#' classname = "BinaryTruePositives", -#' initialize = function(name = 'binary_true_positives', ...) { -#' super$initialize(name = name, ...) -#' self$true_positives <- -#' self$add_weight(name = 'tp', initializer = 'zeros') -#' }, +#' @param alpha +#' A weight balancing factor for class 1, default is `0.25` as +#' mentioned in the reference. The weight for class 0 is `1.0 - alpha`. #' -#' update_state = function(y_true, y_pred, sample_weight = NULL) { -#' y_true <- k_cast(y_true, "bool") -#' y_pred <- k_cast(y_pred, "bool") +#' @param gamma +#' A focusing parameter, default is `2.0` as mentioned in the +#' reference. #' -#' values <- y_true & y_pred -#' values <- k_cast(values, self$dtype) -#' if (!is.null(sample_weight)) { -#' sample_weight <- k_cast(sample_weight, self$dtype) -#' sample_weight <- tf$broadcast_to(sample_weight, values$shape) -#' values <- values * sample_weight -#' } -#' self$true_positives$assign_add(tf$reduce_sum(values)) -#' }, +#' @param from_logits +#' Whether `y_pred` is expected to be a logits tensor. By +#' default, we assume that `y_pred` encodes a probability distribution. #' -#' result = function() -#' self$true_positives -#' ) -#' model %>% compile(..., metrics = list(metric_binary_true_positives())) -#' ```` -#' The same `metric_binary_true_positives` could be built with `%py_class%` like -#' this: -#' ```` -#' metric_binary_true_positives(keras$metrics$Metric) %py_class% { -#' initialize <- , -#' update_state <- , -#' result <- -#' } -#' ```` +#' @param label_smoothing +#' Float in `[0, 1]`. If > `0` then smooth the labels by +#' squeezing them towards 0.5, that is, +#' using `1. - 0.5 * label_smoothing` for the target class +#' and `0.5 * label_smoothing` for the non-target class. #' -#' @name Metric -#' @rdname Metric -NULL - - -#' @title metric-or-Metric -#' @name metric-or-Metric -#' @rdname metric-or-Metric -#' @keywords internal +#' @param axis +#' The axis along which the mean is computed. Defaults to `-1`. #' -#' @param y_true Tensor of true targets. -#' @param y_pred Tensor of predicted targets. -#' @param ... Passed on to the underlying metric. Used for forwards and backwards compatibility. -#' @param axis (Optional) (1-based) Defaults to -1. The dimension along which the metric is computed. -#' @param name (Optional) string name of the metric instance. -#' @param dtype (Optional) data type of the metric result. -#' -#' @returns If `y_true` and `y_pred` are missing, a (subclassed) `Metric` -#' instance is returned. The `Metric` object can be passed directly to -#' `compile(metrics = )` or used as a standalone object. See `?Metric` for -#' example usage. -#' -#' Alternatively, if called with `y_true` and `y_pred` arguments, then the -#' computed case-wise values for the mini-batch are returned directly. -NULL - - -# if(!exists("isFALSE")) -if(getRversion() < "3.5") - isFALSE <- function(x) { - is.logical(x) && length(x) == 1L && !is.na(x) && !x - } - -py_metric_wrapper <- function(py_fn, py_cls, formals=NULL, modifiers=NULL, - py_fn_name = TRUE) { - modifiers <- substitute(modifiers) - py_fn <- substitute(py_fn) - py_cls <- substitute(py_cls) - - if(is.symbol(py_cls)) - py_cls <- substitute(keras$metrics$py_cls) - - if(is.symbol(py_fn)) - py_fn <- substitute(keras$metrics$py_fn) - - if("axis" %in% names(formals)) - modifiers$axis <- quote(as_axis) - - - if (is.null(py_fn)) { - body <- substitute({ - args <- capture_args(match.call(), modifiers) - do.call(py_cls, args) - }) - - formals <- c(alist(... =), formals) - if (!is.character(py_fn_name)) - py_fn_name <- NULL - - } else { - - body <- substitute({ - args <- capture_args(match.call(), modifiers) - py_callable <- if (missing(y_true) && missing(y_pred)) - py_cls else py_fn - do.call(py_callable, args) - }) - - formals <- c(alist(y_true = , y_pred =), formals) - if (!isFALSE(py_fn_name)) { - py_fn_name <- if (isTRUE(py_fn_name)) { - last <- function(x) x[[length(x)]] - last(strsplit(deparse(py_fn), "$", fixed = TRUE)[[1]]) - } - else - NULL - } - } - - formals[["..."]] <- quote(expr = ) - - if (!is.null(py_cls)) { - if (!"name" %in% names(formals)) - formals['name'] <- list(py_fn_name) - if (!"dtype" %in% names(formals)) - formals['dtype'] <- list(NULL) - } - - fn <- as.function.default(c(formals, body), envir = parent.frame()) - - if(is.character(py_fn_name)) - attr(fn, "py_function_name") <- py_fn_name - - fn +#' @inherit metric_binary_accuracy return +#' @export +#' @family losses +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.binary_focal_crossentropy +metric_binary_focal_crossentropy <- +function (y_true, y_pred, apply_class_balancing = FALSE, alpha = 0.25, + gamma = 2, from_logits = FALSE, label_smoothing = 0, axis = -1L) +{ + args <- capture_args(list( + y_true = function (x) + if (is_py_object(x)) x + else np_array(x), + y_pred = function (x) + if (is_py_object(x)) x + else np_array(x), axis = as_axis) + ) + do.call(keras$metrics$binary_focal_crossentropy, args) } - - - - - -#' Approximates the AUC (Area under the curve) of the ROC or PR curves +#' Computes the categorical focal crossentropy loss. #' -#' @details The AUC (Area under the curve) of the ROC (Receiver operating -#' characteristic; default) or PR (Precision Recall) curves are quality measures -#' of binary classifiers. Unlike the accuracy, and like cross-entropy losses, -#' ROC-AUC and PR-AUC evaluate all the operational points of a model. +#' @description #' -#' This class approximates AUCs using a Riemann sum. During the metric -#' accumulation phrase, predictions are accumulated within predefined buckets by -#' value. The AUC is then computed by interpolating per-bucket averages. These -#' buckets define the evaluated operational points. -#' -#' This metric creates four local variables, `true_positives`, `true_negatives`, -#' `false_positives` and `false_negatives` that are used to compute the AUC. To -#' discretize the AUC curve, a linearly spaced set of thresholds is used to -#' compute pairs of recall and precision values. The area under the ROC-curve is -#' therefore computed using the height of the recall values by the false -#' positive rate, while the area under the PR-curve is the computed using the -#' height of the precision values by the recall. +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1, 0), c(0, 0, 1)) +#' y_pred <- rbind(c(0.05, 0.9, 0.05), c(0.1, 0.85, 0.05)) +#' loss <- loss_categorical_focal_crossentropy(y_true, y_pred) +#' loss +#' ``` #' -#' This value is ultimately returned as `auc`, an idempotent operation that -#' computes the area under a discretized curve of precision versus recall values -#' (computed using the aforementioned variables). The `num_thresholds` variable -#' controls the degree of discretization with larger numbers of thresholds more -#' closely approximating the true AUC. The quality of the approximation may vary -#' dramatically depending on `num_thresholds`. The `thresholds` parameter can be -#' used to manually specify thresholds which split the predictions more evenly. -#' -#' For a best approximation of the real AUC, `predictions` should be distributed -#' approximately uniformly in the range `[0, 1]` (if `from_logits=FALSE`). The -#' quality of the AUC approximation may be poor if this is not the case. Setting -#' `summation_method` to 'minoring' or 'majoring' can help quantify the error in -#' the approximation by providing lower or upper bound estimate of the AUC. -#' -#' If `sample_weight` is `NULL`, weights default to 1. Use `sample_weight` of 0 -#' to mask values. -#' -#' @param num_thresholds (Optional) Defaults to 200. The number of thresholds toa -#' use when discretizing the roc curve. Values must be > 1. -#' -#' @param curve (Optional) Specifies the name of the curve to be computed, 'ROC' -#' (default) or 'PR' for the Precision-Recall-curve. -#' -#' @param summation_method (Optional) Specifies the [Riemann summation method]( -#' https://en.wikipedia.org/wiki/Riemann_sum) used. 'interpolation' (default) -#' applies mid-point summation scheme for `ROC`. For PR-AUC, interpolates -#' (true/false) positives but not the ratio that is precision (see Davis & -#' Goadrich 2006 for details); 'minoring' applies left summation for -#' increasing intervals and right summation for decreasing intervals; -#' 'majoring' does the opposite. -#' -#' @param thresholds (Optional) A list of floating point values to use as the -#' thresholds for discretizing the curve. If set, the `num_thresholds` -#' parameter is ignored. Values should be in `[0, 1]`. Endpoint thresholds equal -#' to {-epsilon, 1+epsilon} for a small positive epsilon value will be -#' automatically included with these to correctly handle predictions equal to -#' exactly 0 or 1. -#' -#' @param multi_label boolean indicating whether multilabel data should be -#' treated as such, wherein AUC is computed separately for each label and then -#' averaged across labels, or (when FALSE) if the data should be flattened -#' into a single label before AUC computation. In the latter case, when -#' multilabel data is passed to AUC, each label-prediction pair is treated as -#' an individual data point. Should be set to FALSE for multi-class data. -#' -#' @param num_labels (Optional) The number of labels, used when `multi_label` is -#' TRUE. If `num_labels` is not specified, then state variables get created on -#' the first call to `update_state`. -#' -#' @param label_weights (Optional) list, array, or tensor of non-negative -#' weights used to compute AUCs for multilabel data. When `multi_label` is -#' TRUE, the weights are applied to the individual label AUCs when they are -#' averaged to produce the multi-label AUC. When it's FALSE, they are used to -#' weight the individual label predictions in computing the confusion matrix -#' on the flattened data. Note that this is unlike class_weights in that -#' class_weights weights the example depending on the value of its label, -#' whereas label_weights depends only on the index of that label before -#' flattening; therefore `label_weights` should not be used for multi-class -#' data. -#' -#' @param from_logits boolean indicating whether the predictions (`y_pred` in -#' `update_state`) are probabilities or sigmoid logits. As a rule of thumb, -#' when using a keras loss, the `from_logits` constructor argument of the loss -#' should match the AUC `from_logits` constructor argument. -#' -#' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_auc <- py_metric_wrapper( - NULL, AUC, - alist( - num_thresholds = 200L, - curve = 'ROC', - summation_method = 'interpolation', - thresholds = NULL, - multi_label = FALSE, - num_labels = NULL, - label_weights = NULL, - from_logits = FALSE - ), - list(num_thresholds = as.integer) -) - - - -#' Calculates how often predictions equal labels +#' @returns +#' Categorical focal crossentropy loss value. #' -#' @details -#' This metric creates two local variables, `total` and `count` that are used to -#' compute the frequency with which `y_pred` matches `y_true`. This frequency is -#' ultimately returned as `binary accuracy`: an idempotent operation that simply -#' divides `total` by `count`. +#' @param y_true +#' Tensor of one-hot true targets. #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' @param y_pred +#' Tensor of predicted targets. #' +#' @param alpha +#' A weight balancing factor for all classes, default is `0.25` as +#' mentioned in the reference. It can be a list of floats or a scalar. +#' In the multi-class case, alpha may be set by inverse class +#' frequency by using `compute_class_weight` from `sklearn.utils`. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' @param gamma +#' A focusing parameter, default is `2.0` as mentioned in the +#' reference. It helps to gradually reduce the importance given to +#' simple examples in a smooth manner. When `gamma` = 0, there is +#' no focal effect on the categorical crossentropy. +#' +#' @param from_logits +#' Whether `y_pred` is expected to be a logits tensor. By +#' default, we assume that `y_pred` encodes a probability +#' distribution. +#' +#' @param label_smoothing +#' Float in `[0, 1].` If > `0` then smooth the labels. For +#' example, if `0.1`, use `0.1 / num_classes` for non-target labels +#' and `0.9 + 0.1 / num_classes` for target labels. +#' +#' @param axis +#' Defaults to `-1`. The dimension along which the entropy is +#' computed. +#' +#' @inherit metric_binary_accuracy return #' @export -metric_accuracy <- py_metric_wrapper( - NULL, Accuracy -) - +#' @family losses +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.categorical_focal_crossentropy +metric_categorical_focal_crossentropy <- +function (y_true, y_pred, alpha = 0.25, gamma = 2, from_logits = FALSE, + label_smoothing = 0, axis = -1L) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x), axis = as_axis)) + do.call(keras$metrics$categorical_focal_crossentropy, args) +} -#' Computes the recall of the predictions with respect to the labels -#' -#' @details This metric creates two local variables, `true_positives` and -#' `false_negatives`, that are used to compute the recall. This value is -#' ultimately returned as `recall`, an idempotent operation that simply divides -#' `true_positives` by the sum of `true_positives` and `false_negatives`. -#' -#' If `sample_weight` is `NULL`, weights default to 1. Use `sample_weight` of 0 -#' to mask values. +#' Computes Huber loss value. #' -#' If `top_k` is set, recall will be computed as how often on average a class -#' among the labels of a batch entry is in the top-k predictions. +#' @description +#' Formula: +#' ```{r, eval = FALSE} +#' for (x in error) { +#' if (abs(x) <= delta){ +#' loss <- c(loss, (0.5 * x^2)) +#' } else if (abs(x) > delta) { +#' loss <- c(loss, (delta * abs(x) - 0.5 * delta^2)) +#' } +#' } +#' loss <- mean(loss) +#' ``` +#' See: [Huber loss](https://en.wikipedia.org/wiki/Huber_loss). #' -#' If `class_id` is specified, we calculate recall by considering only the -#' entries in the batch for which `class_id` is in the label, and computing the -#' fraction of them for which `class_id` is above the threshold and/or in the -#' top-k predictions. +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0, 1), c(0, 0)) +#' y_pred <- rbind(c(0.6, 0.4), c(0.4, 0.6)) +#' loss <- loss_huber(y_true, y_pred) +#' ``` #' -#' @param thresholds (Optional) A float value or a list of float -#' threshold values in `[0, 1]`. A threshold is compared with prediction values -#' to determine the truth value of predictions (i.e., above the threshold is -#' `true`, below is `false`). One metric value is generated for each threshold -#' value. If neither thresholds nor top_k are set, the default is to calculate -#' recall with `thresholds=0.5`. +#' @returns +#' Tensor with one scalar loss entry per sample. #' -#' @param top_k (Optional) Unset by default. An int value specifying the top-k -#' predictions to consider when calculating recall. +#' @param y_true +#' tensor of true targets. #' -#' @param class_id (Optional) Integer class ID for which we want binary metrics. -#' This must be in the half-open interval `[0, num_classes)`, where -#' `num_classes` is the last dimension of predictions. +#' @param y_pred +#' tensor of predicted targets. #' +#' @param delta +#' A float, the point where the Huber loss function changes from a +#' quadratic to linear. Defaults to `1.0`. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' @inherit metric_binary_accuracy return #' @export -metric_recall <- py_metric_wrapper( - NULL, Recall, - alist(thresholds=NULL, top_k=NULL, class_id=NULL), - list(top_k = as_nullable_integer, - class_id = as_nullable_integer) -) - +#' @family losses +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.huber +metric_huber <- +function (y_true, y_pred, delta = 1) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + do.call(keras$metrics$huber, args) +} -#' Computes best recall where precision is >= specified value -#' -#' @details For a given score-label-distribution the required precision might -#' not be achievable, in this case 0.0 is returned as recall. -#' -#' This metric creates four local variables, `true_positives`, `true_negatives`, -#' `false_positives` and `false_negatives` that are used to compute the recall -#' at the given precision. The threshold for the given precision value is -#' computed and used to evaluate the corresponding recall. +#' Logarithm of the hyperbolic cosine of the prediction error. #' -#' If `sample_weight` is `NULL`, weights default to 1. Use `sample_weight` of 0 -#' to mask values. -#' -#' If `class_id` is specified, we calculate precision by considering only the -#' entries in the batch for which `class_id` is above the threshold predictions, -#' and computing the fraction of them for which `class_id` is indeed a correct -#' label. +#' @description +#' Formula: +#' ```{r, eval = FALSE} +#' loss <- mean(log(cosh(y_pred - y_true)), axis=-1) +#' ``` #' -#' @param precision A scalar value in range `[0, 1]`. +#' Note that `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small +#' `x` and to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works +#' mostly like the mean squared error, but will not be so strongly affected by +#' the occasional wildly incorrect prediction. +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(c(0., 1.), c(0., 0.)) +#' y_pred <- rbind(c(1., 1.), c(0., 0.)) +#' loss <- metric_log_cosh(y_true, y_pred) +#' loss +#' ``` #' -#' @param num_thresholds (Optional) Defaults to 200. The number of thresholds to -#' use for matching the given precision. +#' @returns +#' Logcosh error values with shape = `[batch_size, d0, .. dN-1]`. #' -#' @param class_id (Optional) Integer class ID for which we want binary metrics. -#' This must be in the half-open interval `[0, num_classes)`, where -#' `num_classes` is the last dimension of predictions. +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. #' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' @inherit metric_binary_accuracy return #' @export -metric_recall_at_precision <- py_metric_wrapper( - NULL, RecallAtPrecision, - alist(precision=, num_thresholds=200L, class_id=NULL), - list(num_thresholds = as.integer) -) - +#' @family losses +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.log_cosh +metric_log_cosh <- +function (y_true, y_pred) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + do.call(keras$metrics$log_cosh, args) +} -#' Computes the precision of the predictions with respect to the labels -#' -#' @details The metric creates two local variables, `true_positives` and -#' `false_positives` that are used to compute the precision. This value is -#' ultimately returned as `precision`, an idempotent operation that simply -#' divides `true_positives` by the sum of `true_positives` and -#' `false_positives`. -#' -#' If `sample_weight` is `NULL`, weights default to 1. Use `sample_weight` of 0 -#' to mask values. -#' -#' If `top_k` is set, we'll calculate precision as how often on average a class -#' among the top-k classes with the highest predicted values of a batch entry is -#' correct and can be found in the label for that entry. +#' Calculates how often predictions match binary labels. #' -#' If `class_id` is specified, we calculate precision by considering only the -#' entries in the batch for which `class_id` is above the threshold and/or in -#' the top-k highest predictions, and computing the fraction of them for which -#' `class_id` is indeed a correct label. +#' @description +#' This metric creates two local variables, `total` and `count` that are used +#' to compute the frequency with which `y_pred` matches `y_true`. This +#' frequency is ultimately returned as `binary accuracy`: an idempotent +#' operation that simply divides `total` by `count`. #' -#' @param thresholds (Optional) A float value or a list of float -#' threshold values in `[0, 1]`. A threshold is compared with prediction values -#' to determine the truth value of predictions (i.e., above the threshold is -#' `true`, below is `false`). One metric value is generated for each threshold -#' value. If neither thresholds nor top_k are set, the default is to calculate -#' precision with `thresholds=0.5`. +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. #' -#' @param top_k (Optional) Unset by default. An int value specifying the top-k -#' predictions to consider when calculating precision. +#' # Usage +#' Standalone usage: #' -#' @param class_id (Optional) Integer class ID for which we want binary metrics. -#' This must be in the half-open interval `[0, num_classes)`, where -#' `num_classes` is the last dimension of predictions. +#' ```{r} +#' m <- metric_binary_accuracy() +#' m$update_state(rbind(1, 1, 0, 0), rbind(0.98, 1, 0, 0.6)) +#' m$result() +#' # 0.75 +#' ``` #' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(1, 1, 0, 0), rbind(0.98, 1, 0, 0.6), +#' sample_weight = c(1, 0, 0, 1)) +#' m$result() +#' # 0.5 +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_precision <- py_metric_wrapper( - NULL, Precision, - alist(thresholds=NULL, top_k=NULL, class_id=NULL), - list(top_k = as_nullable_integer) -) - - - -#' Computes best precision where recall is >= specified value +#' Usage with `compile()` API: #' -#' @details This metric creates four local variables, `true_positives`, -#' `true_negatives`, `false_positives` and `false_negatives` that are used to -#' compute the precision at the given recall. The threshold for the given recall -#' value is computed and used to evaluate the corresponding precision. +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer='sgd', +#' loss='binary_crossentropy', +#' metrics=list(metric_binary_accuracy())) +#' ``` #' -#' If `sample_weight` is `NULL`, weights default to 1. Use `sample_weight` of 0 -#' to mask values. +#' @param name +#' (Optional) string name of the metric instance. #' -#' If `class_id` is specified, we calculate precision by considering only the -#' entries in the batch for which `class_id` is above the threshold predictions, -#' and computing the fraction of them for which `class_id` is indeed a correct -#' label. +#' @param dtype +#' (Optional) data type of the metric result. #' -#' @param recall A scalar value in range `[0, 1]`. +#' @param threshold +#' (Optional) Float representing the threshold for deciding +#' whether prediction values are 1 or 0. #' -#' @param num_thresholds (Optional) Defaults to 200. The number of thresholds to -#' use for matching the given recall. +#' @param y_true +#' Tensor of true targets. #' -#' @param class_id (Optional) Integer class ID for which we want binary metrics. -#' This must be in the half-open interval `[0, num_classes)`, where -#' `num_classes` is the last dimension of predictions. +#' @param y_pred +#' Tensor of predicted targets. #' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics #' @export -metric_precision_at_recall <- py_metric_wrapper( - NULL, PrecisionAtRecall, - alist(recall=, num_thresholds=200L, class_id=NULL), - list(num_thresholds = as.integer) -) - - - -#' Computes root mean squared error metric between `y_true` and `y_pred` -#' -#' @inheritParams metric-or-Metric -#' @inherit Metric return +#' @family accuracy metrics #' @family metrics -#' @export -metric_root_mean_squared_error <- py_metric_wrapper( - NULL, RootMeanSquaredError -) - +#' @returns If `y_true` and `y_pred` are missing, a `Metric` +#' instance is returned. The `Metric` instance that can be passed directly to +#' `compile(metrics = )`, or used as a standalone object. See `?Metric` for +#' example usage. If `y_true` and `y_pred` are provided, then a tensor with +#' the computed value is returned. +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.BinaryAccuracy +metric_binary_accuracy <- +function (y_true, y_pred, threshold = 0.5, ..., name = "binary_accuracy", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$BinaryAccuracy + else keras$metrics$binary_accuracy + do.call(callable, args) +} -#' Computes best sensitivity where specificity is >= specified value +#' Calculates how often predictions match one-hot labels. #' -#' The sensitivity at a given specificity. +#' @description +#' You can provide logits of classes as `y_pred`, since argmax of +#' logits and probabilities are same. #' -#' `Sensitivity` measures the proportion of actual positives that are correctly -#' identified as such `(tp / (tp + fn))`. `Specificity` measures the proportion of -#' actual negatives that are correctly identified as such `(tn / (tn + fp))`. +#' This metric creates two local variables, `total` and `count` that are used +#' to compute the frequency with which `y_pred` matches `y_true`. This +#' frequency is ultimately returned as `categorical accuracy`: an idempotent +#' operation that simply divides `total` by `count`. +#' +#' `y_pred` and `y_true` should be passed in as vectors of probabilities, +#' rather than as labels. If necessary, use `op_one_hot` to expand `y_true` as +#' a vector. #' -#' This metric creates four local variables, `true_positives`, `true_negatives`, -#' `false_positives` and `false_negatives` that are used to compute the -#' sensitivity at the given specificity. The threshold for the given specificity -#' value is computed and used to evaluate the corresponding sensitivity. +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. #' -#' If `sample_weight` is `NULL`, weights default to 1. Use `sample_weight` of 0 -#' to mask values. +#' # Usage +#' Standalone usage: #' -#' If `class_id` is specified, we calculate precision by considering only the -#' entries in the batch for which `class_id` is above the threshold predictions, -#' and computing the fraction of them for which `class_id` is indeed a correct -#' label. +#' ```{r} +#' m <- metric_categorical_accuracy() +#' m$update_state(rbind(c(0, 0, 1), c(0, 1, 0)), rbind(c(0.1, 0.9, 0.8), +#' c(0.05, 0.95, 0))) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(c(0, 0, 1), c(0, 1, 0)), rbind(c(0.1, 0.9, 0.8), +#' c(0.05, 0.95, 0)), +#' sample_weight = c(0.7, 0.3)) +#' m$result() +#' # 0.3 +#' ``` +#' +#' Usage with `compile()` API: #' -#' For additional information about specificity and sensitivity, see [the -#' following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer = 'sgd', +#' loss = 'categorical_crossentropy', +#' metrics = list(metric_categorical_accuracy())) +#' ``` #' -#' @param specificity A scalar value in range `[0, 1]`. +#' @param name +#' (Optional) string name of the metric instance. #' -#' @param num_thresholds (Optional) Defaults to 200. The number of thresholds to -#' use for matching the given specificity. +#' @param dtype +#' (Optional) data type of the metric result. #' -#' @param class_id (Optional) Integer class ID for which we want binary metrics. -#' This must be in the half-open interval `[0, num_classes)`, where -#' `num_classes` is the last dimension of predictions. +#' @param y_true +#' Tensor of true targets. #' +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics #' @export -metric_sensitivity_at_specificity <- py_metric_wrapper( - NULL, SensitivityAtSpecificity, - alist(specificity = , num_thresholds = 200L, class_id = NULL), - list(num_thresholds = as.integer) -) - +#' @family accuracy metrics +#' @family metrics +#' @inherit metric_binary_accuracy return +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.CategoricalAccuracy +metric_categorical_accuracy <- +function (y_true, y_pred, ..., name = "categorical_accuracy", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$CategoricalAccuracy + else keras$metrics$categorical_accuracy + do.call(callable, args) +} -#' Computes best specificity where sensitivity is >= specified value +#' Calculates how often predictions match integer labels. #' -#' @details -#' `Sensitivity` measures the proportion of actual positives that are correctly -#' identified as such `(tp / (tp + fn))`. -#' `Specificity` measures the proportion of actual negatives that are correctly -#' identified as such `(tn / (tn + fp))`. +#' @description +#' ```{r, eval=FALSE} +#' acc <- sample_weight %*% (y_true == which.max(y_pred)) +#' ``` +#' +#' You can provide logits of classes as `y_pred`, since argmax of +#' logits and probabilities are same. #' -#' This metric creates four local variables, `true_positives`, `true_negatives`, -#' `false_positives` and `false_negatives` that are used to compute the -#' specificity at the given sensitivity. The threshold for the given sensitivity -#' value is computed and used to evaluate the corresponding specificity. +#' This metric creates two local variables, `total` and `count` that are used +#' to compute the frequency with which `y_pred` matches `y_true`. This +#' frequency is ultimately returned as `sparse categorical accuracy`: an +#' idempotent operation that simply divides `total` by `count`. #' #' If `sample_weight` is `NULL`, weights default to 1. #' Use `sample_weight` of 0 to mask values. #' -#' If `class_id` is specified, we calculate precision by considering only the -#' entries in the batch for which `class_id` is above the threshold predictions, -#' and computing the fraction of them for which `class_id` is indeed a correct -#' label. +#' # Usage +#' Standalone usage: #' -#' For additional information about specificity and sensitivity, see -#' [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). +#' ```{r} +#' m <- metric_sparse_categorical_accuracy() +#' m$update_state(rbind(2L, 1L), rbind(c(0.1, 0.6, 0.3), c(0.05, 0.95, 0))) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(2L, 1L), rbind(c(0.1, 0.6, 0.3), c(0.05, 0.95, 0)), +#' sample_weight = c(0.7, 0.3)) +#' m$result() +#' ``` #' -#' @param sensitivity A scalar value in range `[0, 1]`. +#' Usage with `compile()` API: #' -#' @param num_thresholds (Optional) Defaults to 200. The number of thresholds to -#' use for matching the given sensitivity. +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer = 'sgd', +#' loss = 'sparse_categorical_crossentropy', +#' metrics = list(metric_sparse_categorical_accuracy())) +#' ``` #' -#' @param class_id (Optional) Integer class ID for which we want binary metrics. -#' This must be in the half-open interval `[0, num_classes)`, where -#' `num_classes` is the last dimension of predictions. +#' @param name +#' (Optional) string name of the metric instance. #' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param y_true +#' Tensor of true targets. +#' +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics #' @export -metric_specificity_at_sensitivity <- py_metric_wrapper( - NULL, SpecificityAtSensitivity, - alist(sensitivity = , num_thresholds = 200L, class_id = NULL), - list(num_thresholds = as.integer) -) - +#' @family accuracy metrics +#' @family metrics +#' @inherit metric_binary_accuracy return +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.SparseCategoricalAccuracy +metric_sparse_categorical_accuracy <- +function (y_true, y_pred, ..., name = "sparse_categorical_accuracy", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$SparseCategoricalAccuracy + else keras$metrics$sparse_categorical_accuracy + do.call(callable, args) +} -#' Computes the (weighted) sum of the given values +#' Computes how often integer targets are in the top `K` predictions. #' -#' @details -#' For example, if values is `c(1, 3, 5, 7)` then the sum is 16. -#' If the weights were specified as `c(1, 1, 0, 0)` then the sum would be 4. +#' @description +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_sparse_top_k_categorical_accuracy(k = 1L) +#' m$update_state( +#' rbind(2, 1), +#' op_array(rbind(c(0.1, 0.9, 0.8), c(0.05, 0.95, 0)), dtype = "float32") +#' ) +#' m$result() +#' ``` #' -#' This metric creates one variable, `total`, that is used to compute the sum of -#' `values`. This is ultimately returned as `sum`. +#' ```{r} +#' m$reset_state() +#' m$update_state( +#' rbind(2, 1), +#' op_array(rbind(c(0.1, 0.9, 0.8), c(0.05, 0.95, 0)), dtype = "float32"), +#' sample_weight = c(0.7, 0.3) +#' ) +#' m$result() +#' ``` #' -#' If `sample_weight` is `NULL`, weights default to 1. Use `sample_weight` of 0 -#' to mask values. +#' Usage with `compile()` API: #' +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer = 'sgd', +#' loss = 'sparse_categorical_crossentropy', +#' metrics = list(metric_sparse_top_k_categorical_accuracy())) +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_sum <- py_metric_wrapper( - NULL, Sum -) - - - -#' Calculates how often predictions match binary labels +#' @param k +#' (Optional) Number of top elements to look at for computing accuracy. +#' Defaults to `5`. #' -#' @details -#' This metric creates two local variables, `total` and `count` that are used to -#' compute the frequency with which `y_pred` matches `y_true`. This frequency is -#' ultimately returned as `binary accuracy`: an idempotent operation that simply -#' divides `total` by `count`. +#' @param name +#' (Optional) string name of the metric instance. #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' @param dtype +#' (Optional) data type of the metric result. #' -#' @param threshold (Optional) Float representing the threshold for deciding -#' whether prediction values are 1 or 0. +#' @param y_true +#' Tensor of true targets. #' +#' @param y_pred +#' Tensor of predicted targets. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return #' @export -metric_binary_accuracy <- py_metric_wrapper( - binary_accuracy, BinaryAccuracy, - alist(threshold = 0.5) -) - +#' @family accuracy metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.SparseTopKCategoricalAccuracy +metric_sparse_top_k_categorical_accuracy <- +function (y_true, y_pred, k = 5L, ..., name = "sparse_top_k_categorical_accuracy", + dtype = NULL) +{ + args <- capture_args(list(k = as_integer, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$SparseTopKCategoricalAccuracy + else keras$metrics$sparse_top_k_categorical_accuracy + do.call(callable, args) +} -#' Computes the crossentropy metric between the labels and predictions +#' Computes how often targets are in the top `K` predictions. #' -#' @details -#' This is the crossentropy metric class to be used when there are only two -#' label classes (0 and 1). +#' @description #' -#' @param from_logits (Optional) Whether output is expected to be a logits tensor. -#' By default, we consider that output encodes a probability distribution. +#' # Usage +#' Standalone usage: #' -#' @param label_smoothing (Optional) Float in `[0, 1]`. When > 0, label values are -#' smoothed, meaning the confidence on label values are relaxed. -#' e.g. `label_smoothing = 0.2` means that we will use a value of `0.1` for -#' label `0` and `0.9` for label `1`". +#' ```{r} +#' m <- metric_top_k_categorical_accuracy(k = 1) +#' m$update_state( +#' rbind(c(0, 0, 1), c(0, 1, 0)), +#' op_array(rbind(c(0.1, 0.9, 0.8), c(0.05, 0.95, 0)), dtype = "float32") +#' ) +#' m$result() +#' ``` #' +#' ```{r} +#' m$reset_state() +#' m$update_state( +#' rbind(c(0, 0, 1), c(0, 1, 0)), +#' op_array(rbind(c(0.1, 0.9, 0.8), c(0.05, 0.95, 0)), dtype = "float32"), +#' sample_weight = c(0.7, 0.3)) +#' m$result() +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics -#' @export -metric_binary_crossentropy <- py_metric_wrapper( - binary_crossentropy, BinaryCrossentropy, - alist(from_logits=FALSE, label_smoothing=0, axis=-1L) -) - - - -#' Calculates how often predictions match one-hot labels +#' Usage with `compile()` API: #' -#' @details -#' You can provide logits of classes as `y_pred`, since argmax of -#' logits and probabilities are same. +#' ```{r, eval = FALSE} +#' model.compile(optimizer = 'sgd', +#' loss = 'categorical_crossentropy', +#' metrics = list(metric_top_k_categorical_accuracy())) +#' ``` #' -#' This metric creates two local variables, `total` and `count` that are used to -#' compute the frequency with which `y_pred` matches `y_true`. This frequency is -#' ultimately returned as `categorical accuracy`: an idempotent operation that -#' simply divides `total` by `count`. +#' @param k +#' (Optional) Number of top elements to look at for computing accuracy. +#' Defaults to `5`. #' -#' `y_pred` and `y_true` should be passed in as vectors of probabilities, rather -#' than as labels. If necessary, use `tf.one_hot` to expand `y_true` as a vector. +#' @param name +#' (Optional) string name of the metric instance. #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' @param dtype +#' (Optional) data type of the metric result. #' +#' @param y_true +#' Tensor of true targets. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return #' @export -metric_categorical_accuracy <- py_metric_wrapper( - categorical_accuracy, CategoricalAccuracy -) - +#' @family accuracy metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.TopKCategoricalAccuracy +metric_top_k_categorical_accuracy <- +function (y_true, y_pred, k = 5L, ..., name = "top_k_categorical_accuracy", + dtype = NULL) +{ + args <- capture_args(list( + k = as_integer, + y_true = function(x) + if (is_py_object(x)) x else np_array(x), + y_pred = function(x) + if (is_py_object(x)) x else np_array(x) + )) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$TopKCategoricalAccuracy + else keras$metrics$top_k_categorical_accuracy + do.call(callable, args) +} -#' Computes the crossentropy metric between the labels and predictions +#' Approximates the AUC (Area under the curve) of the ROC or PR curves. #' -#' @details -#' This is the crossentropy metric class to be used when there are multiple -#' label classes (2 or more). Here we assume that labels are given as a `one_hot` -#' representation. eg., When labels values are `c(2, 0, 1)`: -#' ``` -#' y_true = rbind(c(0, 0, 1), -#' c(1, 0, 0), -#' c(0, 1, 0))` -#' ``` -#' @param from_logits (Optional) Whether output is expected to be a logits tensor. -#' By default, we consider that output encodes a probability distribution. +#' @description +#' The AUC (Area under the curve) of the ROC (Receiver operating +#' characteristic; default) or PR (Precision Recall) curves are quality +#' measures of binary classifiers. Unlike the accuracy, and like cross-entropy +#' losses, ROC-AUC and PR-AUC evaluate all the operational points of a model. #' -#' @param label_smoothing (Optional) Float in `[0, 1]`. When > 0, label values are -#' smoothed, meaning the confidence on label values are relaxed. e.g. -#' `label_smoothing=0.2` means that we will use a value of `0.1` for label -#' `0` and `0.9` for label `1`" +#' This class approximates AUCs using a Riemann sum. During the metric +#' accumulation phrase, predictions are accumulated within predefined buckets +#' by value. The AUC is then computed by interpolating per-bucket averages. +#' These buckets define the evaluated operational points. #' +#' This metric creates four local variables, `true_positives`, +#' `true_negatives`, `false_positives` and `false_negatives` that are used to +#' compute the AUC. To discretize the AUC curve, a linearly spaced set of +#' thresholds is used to compute pairs of recall and precision values. The area +#' under the ROC-curve is therefore computed using the height of the recall +#' values by the false positive rate, while the area under the PR-curve is the +#' computed using the height of the precision values by the recall. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics -#' @export -metric_categorical_crossentropy <- py_metric_wrapper( - categorical_crossentropy, CategoricalCrossentropy, - alist(from_logits = FALSE, label_smoothing = 0, axis = -1L) -) - - - -#' Computes the cosine similarity between the labels and predictions +#' This value is ultimately returned as `auc`, an idempotent operation that +#' computes the area under a discretized curve of precision versus recall +#' values (computed using the aforementioned variables). The `num_thresholds` +#' variable controls the degree of discretization with larger numbers of +#' thresholds more closely approximating the true AUC. The quality of the +#' approximation may vary dramatically depending on `num_thresholds`. The +#' `thresholds` parameter can be used to manually specify thresholds which +#' split the predictions more evenly. +#' +#' For a best approximation of the real AUC, `predictions` should be +#' distributed approximately uniformly in the range `[0, 1]` (if +#' `from_logits=FALSE`). The quality of the AUC approximation may be poor if +#' this is not the case. Setting `summation_method` to 'minoring' or 'majoring' +#' can help quantify the error in the approximation by providing lower or upper +#' bound estimate of the AUC. #' -#' @details -#' ``` -#' cosine similarity = (a . b) / ||a|| ||b|| +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_auc(num_thresholds = 3) +#' m$update_state(c(0, 0, 1, 1), +#' c(0, 0.5, 0.3, 0.9)) +#' # threshold values are [0 - 1e-7, 0.5, 1 + 1e-7] +#' # tp = [2, 1, 0], fp = [2, 0, 0], fn = [0, 1, 2], tn = [0, 2, 2] +#' # tp_rate = recall = [1, 0.5, 0], fp_rate = [1, 0, 0] +#' # auc = ((((1 + 0.5) / 2) * (1 - 0)) + (((0.5 + 0) / 2) * (0 - 0))) +#' # = 0.75 +#' m$result() #' ``` #' -#' See: [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity). +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 0, 1, 1), +#' c(0, 0.5, 0.3, 0.9), +#' sample_weight=c(1, 0, 0, 1)) +#' m$result() +#' ``` #' -#' This metric keeps the average cosine similarity between `predictions` and -#' `labels` over a stream of data. +#' Usage with `compile()` API: #' -#' @note If you want to compute the cosine_similarity for each case in a -#' mini-batch you can use `loss_cosine_similarity()`. +#' ```{r, eval = FALSE} +#' # Reports the AUC of a model outputting a probability. +#' model |> compile( +#' optimizer = 'sgd', +#' loss = loss_binary_crossentropy(), +#' metrics = list(metric_auc()) +#' ) #' +#' # Reports the AUC of a model outputting a logit. +#' model |> compile( +#' optimizer = 'sgd', +#' loss = loss_binary_crossentropy(from_logits = TRUE), +#' metrics = list(metric_auc(from_logits = TRUE)) +#' ) +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' @param num_thresholds +#' (Optional) The number of thresholds to +#' use when discretizing the roc curve. Values must be > 1. +#' Defaults to `200`. +#' +#' @param curve +#' (Optional) Specifies the name of the curve to be computed, +#' `'ROC'` (default) or `'PR'` for the Precision-Recall-curve. +#' +#' @param summation_method +#' (Optional) Specifies the [Riemann summation method]( +#' https://en.wikipedia.org/wiki/Riemann_sum) used. +#' 'interpolation' (default) applies mid-point summation scheme for +#' `ROC`. For PR-AUC, interpolates (true/false) positives but not +#' the ratio that is precision (see Davis & Goadrich 2006 for +#' details); 'minoring' applies left summation for increasing +#' intervals and right summation for decreasing intervals; 'majoring' +#' does the opposite. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param thresholds +#' (Optional) A list of floating point values to use as the +#' thresholds for discretizing the curve. If set, the `num_thresholds` +#' parameter is ignored. Values should be in `[0, 1]`. Endpoint +#' thresholds equal to \{`-epsilon`, `1+epsilon`\} for a small positive +#' epsilon value will be automatically included with these to correctly +#' handle predictions equal to exactly 0 or 1. +#' +#' @param multi_label +#' boolean indicating whether multilabel data should be +#' treated as such, wherein AUC is computed separately for each label +#' and then averaged across labels, or (when `FALSE`) if the data +#' should be flattened into a single label before AUC computation. In +#' the latter case, when multilabel data is passed to AUC, each +#' label-prediction pair is treated as an individual data point. Should +#' be set to `FALSE`` for multi-class data. +#' +#' @param num_labels +#' (Optional) The number of labels, used when `multi_label` is +#' TRUE. If `num_labels` is not specified, then state variables get +#' created on the first call to `update_state`. +#' +#' @param label_weights +#' (Optional) list, array, or tensor of non-negative weights +#' used to compute AUCs for multilabel data. When `multi_label` is +#' TRUE, the weights are applied to the individual label AUCs when they +#' are averaged to produce the multi-label AUC. When it's FALSE, they +#' are used to weight the individual label predictions in computing the +#' confusion matrix on the flattened data. Note that this is unlike +#' `class_weights` in that `class_weights` weights the example +#' depending on the value of its label, whereas `label_weights` depends +#' only on the index of that label before flattening; therefore +#' `label_weights` should not be used for multi-class data. +#' +#' @param from_logits +#' boolean indicating whether the predictions (`y_pred` in +#' `update_state`) are probabilities or sigmoid logits. As a rule of thumb, +#' when using a keras loss, the `from_logits` constructor argument of the +#' loss should match the AUC `from_logits` constructor argument. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @returns a `Metric` instance is returned. The `Metric` instance can be passed +#' directly to `compile(metrics = )`, or used as a standalone object. See +#' `?Metric` for example usage. #' @export -metric_cosine_similarity <- py_metric_wrapper( - NULL, - CosineSimilarity, - alist(axis=-1L, name='cosine_similarity') -) - +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.AUC +metric_auc <- +function (..., num_thresholds = 200L, curve = "ROC", summation_method = "interpolation", + name = NULL, dtype = NULL, thresholds = NULL, multi_label = FALSE, + num_labels = NULL, label_weights = NULL, from_logits = FALSE) +{ + args <- capture_args(list(num_thresholds = as_integer)) + do.call(keras$metrics$AUC, args) +} -#' Calculates the number of false negatives +#' Calculates the number of false negatives. #' -#' @details +#' @description #' If `sample_weight` is given, calculates the sum of the weights of #' false negatives. This metric creates one local variable, `accumulator` #' that is used to keep track of the number of false negatives. @@ -764,26 +863,60 @@ metric_cosine_similarity <- py_metric_wrapper( #' If `sample_weight` is `NULL`, weights default to 1. #' Use `sample_weight` of 0 to mask values. #' -#' @param thresholds (Optional) Defaults to 0.5. A float value or a -#' list of float threshold values in `[0, 1]`. A threshold is compared -#' with prediction values to determine the truth value of predictions -#' (i.e., above the threshold is `TRUE`, below is `FALSE`). One metric -#' value is generated for each threshold value. +#' # Usage +#' Standalone usage: #' +#' ```{r} +#' m <- metric_false_negatives() +#' m$update_state(c(0, 1, 1, 1), c(0, 1, 0, 0)) +#' m$result() +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 1, 1, 1), c(0, 1, 0, 0), sample_weight=c(0, 0, 1, 0)) +#' m$result() +#' # 1.0 +#' ``` +#' +#' @param thresholds +#' (Optional) Defaults to `0.5`. A float value, or a Python +#' list of float threshold values in `[0, 1]`. A threshold is +#' compared with prediction values to determine the truth value of +#' predictions (i.e., above the threshold is `TRUE`, below is `FALSE`). +#' If used with a loss function that sets `from_logits=TRUE` (i.e. no +#' sigmoid applied to predictions), `thresholds` should be set to 0. +#' One metric value is generated for each threshold value. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return #' @export -metric_false_negatives <- py_metric_wrapper( - NULL, FalseNegatives, - alist(thresholds = NULL)) - +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.FalseNegatives +metric_false_negatives <- +function (..., thresholds = NULL, name = NULL, dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$FalseNegatives, args) +} -#' Calculates the number of false positives +#' Calculates the number of false positives. #' -#' @details +#' @description #' If `sample_weight` is given, calculates the sum of the weights of #' false positives. This metric creates one local variable, `accumulator` #' that is used to keep track of the number of false positives. @@ -791,667 +924,2869 @@ metric_false_negatives <- py_metric_wrapper( #' If `sample_weight` is `NULL`, weights default to 1. #' Use `sample_weight` of 0 to mask values. #' -#' @param thresholds (Optional) Defaults to 0.5. A float value or a -#' list of float threshold values in `[0, 1]`. A threshold is compared -#' with prediction values to determine the truth value of predictions -#' (i.e., above the threshold is `true`, below is `false`). One metric -#' value is generated for each threshold value. +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_false_positives() +#' m$update_state(c(0, 1, 0, 0), c(0, 0, 1, 1)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 1, 0, 0), c(0, 0, 1, 1), sample_weight = c(0, 0, 1, 0)) +#' m$result() +#' ``` +#' +#' @param thresholds +#' (Optional) Defaults to `0.5`. A float value, or a Python +#' list of float threshold values in `[0, 1]`. A threshold is +#' compared with prediction values to determine the truth value of +#' predictions (i.e., above the threshold is `TRUE`, below is `FALSE`). +#' If used with a loss function that sets `from_logits=TRUE` (i.e. no +#' sigmoid applied to predictions), `thresholds` should be set to 0. +#' One metric value is generated for each threshold value. +#' +#' @param name +#' (Optional) string name of the metric instance. #' +#' @param dtype +#' (Optional) data type of the metric result. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics #' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.FalsePositives +metric_false_positives <- +function (..., thresholds = NULL, name = NULL, dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$FalsePositives, args) +} + + +#' Computes the precision of the predictions with respect to the labels. +#' +#' @description +#' The metric creates two local variables, `true_positives` and +#' `false_positives` that are used to compute the precision. This value is +#' ultimately returned as `precision`, an idempotent operation that simply +#' divides `true_positives` by the sum of `true_positives` and +#' `false_positives`. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' If `top_k` is set, we'll calculate precision as how often on average a class +#' among the top-k classes with the highest predicted values of a batch entry +#' is correct and can be found in the label for that entry. +#' +#' If `class_id` is specified, we calculate precision by considering only the +#' entries in the batch for which `class_id` is above the threshold and/or in +#' the top-k highest predictions, and computing the fraction of them for which +#' `class_id` is indeed a correct label. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_precision() +#' m$update_state(c(0, 1, 1, 1), +#' c(1, 0, 1, 1)) +#' m$result() |> as.double() |> signif() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 1, 1, 1), +#' c(1, 0, 1, 1), +#' sample_weight = c(0, 0, 1, 0)) +#' m$result() |> as.double() |> signif() +#' ``` +#' +#' ```{r} +#' # With top_k=2, it will calculate precision over y_true[1:2] +#' # and y_pred[1:2] +#' m <- metric_precision(top_k = 2) +#' m$update_state(c(0, 0, 1, 1), c(1, 1, 1, 1)) +#' m$result() +#' ``` +#' +#' ```{r} +#' # With top_k=4, it will calculate precision over y_true[1:4] +#' # and y_pred[1:4] +#' m <- metric_precision(top_k = 4) +#' m$update_state(c(0, 0, 1, 1), c(1, 1, 1, 1)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval=FALSE} +#' model |> compile( +#' optimizer = 'sgd', +#' loss = 'binary_crossentropy', +#' metrics = list(metric_precision()) +#' ) +#' ``` +#' +#' Usage with a loss with `from_logits=TRUE`: +#' +#' ```{r, eval = FALSE} +#' model |> compile( +#' optimizer = 'adam', +#' loss = loss_binary_crossentropy(from_logits = TRUE), +#' metrics = list(metric_precision(thresholds = 0)) +#' ) +#' ``` +#' +#' @param thresholds +#' (Optional) A float value, or a Python list of float +#' threshold values in `[0, 1]`. A threshold is compared with +#' prediction values to determine the truth value of predictions (i.e., +#' above the threshold is `TRUE`, below is `FALSE`). If used with a +#' loss function that sets `from_logits=TRUE` (i.e. no sigmoid applied +#' to predictions), `thresholds` should be set to 0. One metric value +#' is generated for each threshold value. If neither `thresholds` nor +#' `top_k` are set, the default is to calculate precision with +#' `thresholds=0.5`. +#' +#' @param top_k +#' (Optional) Unset by default. An int value specifying the top-k +#' predictions to consider when calculating precision. +#' +#' @param class_id +#' (Optional) Integer class ID for which we want binary metrics. +#' This must be in the half-open interval `[0, num_classes)`, where +#' `num_classes` is the last dimension of predictions. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return #' @export -metric_false_positives <- py_metric_wrapper( - NULL, FalsePositives, - alist(thresholds = NULL)) +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.Precision +metric_precision <- +function (..., thresholds = NULL, top_k = NULL, class_id = NULL, + name = NULL, dtype = NULL) +{ + args <- capture_args(list(top_k = as_integer, class_id = as_integer)) + do.call(keras$metrics$Precision, args) +} +#' Computes best precision where recall is >= specified value. +#' +#' @description +#' This metric creates four local variables, `true_positives`, +#' `true_negatives`, `false_positives` and `false_negatives` that are used to +#' compute the precision at the given recall. The threshold for the given +#' recall value is computed and used to evaluate the corresponding precision. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' If `class_id` is specified, we calculate precision by considering only the +#' entries in the batch for which `class_id` is above the threshold +#' predictions, and computing the fraction of them for which `class_id` is +#' indeed a correct label. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_precision_at_recall(recall = 0.5) +#' m$update_state(c(0, 0, 0, 1, 1), +#' c(0, 0.3, 0.8, 0.3, 0.8)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 0, 0, 1, 1), +#' c(0, 0.3, 0.8, 0.3, 0.8), +#' sample_weight = c(2, 2, 2, 1, 1)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model |> compile( +#' optimizer = 'sgd', +#' loss = 'binary_crossentropy', +#' metrics = list(metric_precision_at_recall(recall = 0.8)) +#' ) +#' ``` +#' +#' @param recall +#' A scalar value in range `[0, 1]`. +#' +#' @param num_thresholds +#' (Optional) Defaults to 200. The number of thresholds to +#' use for matching the given recall. +#' +#' @param class_id +#' (Optional) Integer class ID for which we want binary metrics. +#' This must be in the half-open interval `[0, num_classes)`, where +#' `num_classes` is the last dimension of predictions. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.PrecisionAtRecall +metric_precision_at_recall <- +function (..., recall, num_thresholds = 200L, class_id = NULL, + name = NULL, dtype = NULL) +{ + args <- capture_args(list(num_thresholds = as_integer, class_id = as_integer)) + do.call(keras$metrics$PrecisionAtRecall, args) +} + -#' Calculates the number of true negatives +#' Computes the recall of the predictions with respect to the labels. #' -#' @details -#' If `sample_weight` is given, calculates the sum of the weights of -#' true negatives. This metric creates one local variable, `accumulator` -#' that is used to keep track of the number of true negatives. +#' @description +#' This metric creates two local variables, `true_positives` and +#' `false_negatives`, that are used to compute the recall. This value is +#' ultimately returned as `recall`, an idempotent operation that simply divides +#' `true_positives` by the sum of `true_positives` and `false_negatives`. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' If `top_k` is set, recall will be computed as how often on average a class +#' among the labels of a batch entry is in the top-k predictions. +#' +#' If `class_id` is specified, we calculate recall by considering only the +#' entries in the batch for which `class_id` is in the label, and computing the +#' fraction of them for which `class_id` is above the threshold and/or in the +#' top-k predictions. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_recall() +#' m$update_state(c(0, 1, 1, 1), +#' c(1, 0, 1, 1)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 1, 1, 1), +#' c(1, 0, 1, 1), +#' sample_weight = c(0, 0, 1, 0)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model |> compile( +#' optimizer = 'sgd', +#' loss = 'binary_crossentropy', +#' metrics = list(metric_recall()) +#' ) +#' ``` +#' +#' Usage with a loss with `from_logits=TRUE`: +#' +#' ```{r, eval = FALSE} +#' model |> compile( +#' optimizer = 'adam', +#' loss = loss_binary_crossentropy(from_logits = TRUE), +#' metrics = list(metric_recall(thresholds = 0)) +#' ) +#' ``` +#' +#' @param thresholds +#' (Optional) A float value, or a Python list of float +#' threshold values in `[0, 1]`. A threshold is compared with +#' prediction values to determine the truth value of predictions (i.e., +#' above the threshold is `TRUE`, below is `FALSE`). If used with a +#' loss function that sets `from_logits=TRUE` (i.e. no sigmoid +#' applied to predictions), `thresholds` should be set to 0. +#' One metric value is generated for each threshold value. +#' If neither `thresholds` nor `top_k` are set, +#' the default is to calculate recall with `thresholds=0.5`. +#' +#' @param top_k +#' (Optional) Unset by default. An int value specifying the top-k +#' predictions to consider when calculating recall. +#' +#' @param class_id +#' (Optional) Integer class ID for which we want binary metrics. +#' This must be in the half-open interval `[0, num_classes)`, where +#' `num_classes` is the last dimension of predictions. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.Recall +metric_recall <- +function (..., thresholds = NULL, top_k = NULL, class_id = NULL, + name = NULL, dtype = NULL) +{ + args <- capture_args(list(top_k = as_integer, class_id = as_integer)) + do.call(keras$metrics$Recall, args) +} + + +#' Computes best recall where precision is >= specified value. +#' +#' @description +#' For a given score-label-distribution the required precision might not +#' be achievable, in this case 0.0 is returned as recall. +#' +#' This metric creates four local variables, `true_positives`, +#' `true_negatives`, `false_positives` and `false_negatives` that are used to +#' compute the recall at the given precision. The threshold for the given +#' precision value is computed and used to evaluate the corresponding recall. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' If `class_id` is specified, we calculate precision by considering only the +#' entries in the batch for which `class_id` is above the threshold +#' predictions, and computing the fraction of them for which `class_id` is +#' indeed a correct label. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_recall_at_precision(precision = 0.8) +#' m$update_state(c(0, 0, 1, 1), +#' c(0, 0.5, 0.3, 0.9)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 0, 1, 1), +#' c(0, 0.5, 0.3, 0.9), +#' sample_weight = c(1, 0, 0, 1)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model |> compile( +#' optimizer = 'sgd', +#' loss = 'binary_crossentropy', +#' metrics = list(metric_recall_at_precision(precision = 0.8)) +#' ) +#' ``` +#' +#' @param precision +#' A scalar value in range `[0, 1]`. +#' +#' @param num_thresholds +#' (Optional) Defaults to 200. The number of thresholds +#' to use for matching the given precision. +#' +#' @param class_id +#' (Optional) Integer class ID for which we want binary metrics. +#' This must be in the half-open interval `[0, num_classes)`, where +#' `num_classes` is the last dimension of predictions. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.RecallAtPrecision +metric_recall_at_precision <- +function (..., precision, num_thresholds = 200L, class_id = NULL, + name = NULL, dtype = NULL) +{ + args <- capture_args(list(num_thresholds = as_integer, class_id = as_integer)) + do.call(keras$metrics$RecallAtPrecision, args) +} + + +#' Computes best sensitivity where specificity is >= specified value. +#' +#' @description +#' `Sensitivity` measures the proportion of actual positives that are correctly +#' identified as such `(tp / (tp + fn))`. +#' `Specificity` measures the proportion of actual negatives that are correctly +#' identified as such `(tn / (tn + fp))`. +#' +#' This metric creates four local variables, `true_positives`, +#' `true_negatives`, `false_positives` and `false_negatives` that are used to +#' compute the sensitivity at the given specificity. The threshold for the +#' given specificity value is computed and used to evaluate the corresponding +#' sensitivity. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' If `class_id` is specified, we calculate precision by considering only the +#' entries in the batch for which `class_id` is above the threshold +#' predictions, and computing the fraction of them for which `class_id` is +#' indeed a correct label. +#' +#' For additional information about specificity and sensitivity, see +#' [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). +#' +#' # Usage +#' +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_sensitivity_at_specificity(specificity = 0.5) +#' m$update_state(c(0, 0, 0, 1, 1), +#' c(0, 0.3, 0.8, 0.3, 0.8)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 0, 0, 1, 1), +#' c(0, 0.3, 0.8, 0.3, 0.8), +#' sample_weight = c(1, 1, 2, 2, 1)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model |> compile( +#' optimizer = 'sgd', +#' loss = 'binary_crossentropy', +#' metrics = list(metric_sensitivity_at_specificity()) +#' ) +#' ``` +#' +#' @param specificity +#' A scalar value in range `[0, 1]`. +#' +#' @param num_thresholds +#' (Optional) Defaults to 200. The number of thresholds to +#' use for matching the given specificity. +#' +#' @param class_id +#' (Optional) Integer class ID for which we want binary metrics. +#' This must be in the half-open interval `[0, num_classes)`, where +#' `num_classes` is the last dimension of predictions. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.SensitivityAtSpecificity +metric_sensitivity_at_specificity <- +function (..., specificity, num_thresholds = 200L, class_id = NULL, + name = NULL, dtype = NULL) +{ + args <- capture_args(list(num_thresholds = as_integer, class_id = as_integer)) + do.call(keras$metrics$SensitivityAtSpecificity, args) +} + + +#' Computes best specificity where sensitivity is >= specified value. +#' +#' @description +#' `Sensitivity` measures the proportion of actual positives that are correctly +#' identified as such `(tp / (tp + fn))`. +#' `Specificity` measures the proportion of actual negatives that are correctly +#' identified as such `(tn / (tn + fp))`. +#' +#' This metric creates four local variables, `true_positives`, +#' `true_negatives`, `false_positives` and `false_negatives` that are used to +#' compute the specificity at the given sensitivity. The threshold for the +#' given sensitivity value is computed and used to evaluate the corresponding +#' specificity. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' If `class_id` is specified, we calculate precision by considering only the +#' entries in the batch for which `class_id` is above the threshold +#' predictions, and computing the fraction of them for which `class_id` is +#' indeed a correct label. +#' +#' For additional information about specificity and sensitivity, see +#' [the following](https://en.wikipedia.org/wiki/Sensitivity_and_specificity). +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_specificity_at_sensitivity(sensitivity = 0.5) +#' m$update_state(c(0, 0, 0, 1, 1), +#' c(0, 0.3, 0.8, 0.3, 0.8)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 0, 0, 1, 1), +#' c(0, 0.3, 0.8, 0.3, 0.8), +#' sample_weight = c(1, 1, 2, 2, 2)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model |> compile( +#' optimizer = 'sgd', +#' loss = 'binary_crossentropy', +#' metrics = list(metric_sensitivity_at_specificity()) +#' ) +#' ``` +#' +#' @param sensitivity +#' A scalar value in range `[0, 1]`. +#' +#' @param num_thresholds +#' (Optional) Defaults to 200. The number of thresholds to +#' use for matching the given sensitivity. +#' +#' @param class_id +#' (Optional) Integer class ID for which we want binary metrics. +#' This must be in the half-open interval `[0, num_classes)`, where +#' `num_classes` is the last dimension of predictions. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.SpecificityAtSensitivity +metric_specificity_at_sensitivity <- +function (..., sensitivity, num_thresholds = 200L, class_id = NULL, + name = NULL, dtype = NULL) +{ + args <- capture_args(list(num_thresholds = as_integer, class_id = as_integer)) + do.call(keras$metrics$SpecificityAtSensitivity, args) +} + + +#' Calculates the number of true negatives. +#' +#' @description +#' If `sample_weight` is given, calculates the sum of the weights of +#' true negatives. This metric creates one local variable, `accumulator` +#' that is used to keep track of the number of true negatives. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_true_negatives() +#' m$update_state(c(0, 1, 0, 0), c(1, 1, 0, 0)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 1, 0, 0), c(1, 1, 0, 0), sample_weight = c(0, 0, 1, 0)) +#' m$result() +#' ``` +#' +#' @param thresholds +#' (Optional) Defaults to `0.5`. A float value, or a Python +#' list of float threshold values in `[0, 1]`. A threshold is +#' compared with prediction values to determine the truth value of +#' predictions (i.e., above the threshold is `TRUE`, below is `FALSE`). +#' If used with a loss function that sets `from_logits=TRUE` (i.e. no +#' sigmoid applied to predictions), `thresholds` should be set to 0. +#' One metric value is generated for each threshold value. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.TrueNegatives +metric_true_negatives <- +function (..., thresholds = NULL, name = NULL, dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$TrueNegatives, args) +} + + +#' Calculates the number of true positives. +#' +#' @description +#' If `sample_weight` is given, calculates the sum of the weights of +#' true positives. This metric creates one local variable, `true_positives` +#' that is used to keep track of the number of true positives. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_true_positives() +#' m$update_state(c(0, 1, 1, 1), c(1, 0, 1, 1)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 1, 1, 1), c(1, 0, 1, 1), sample_weight = c(0, 0, 1, 0)) +#' m$result() +#' ``` +#' +#' @param thresholds +#' (Optional) Defaults to `0.5`. A float value, or a Python +#' list of float threshold values in `[0, 1]`. A threshold is +#' compared with prediction values to determine the truth value of +#' predictions (i.e., above the threshold is `TRUE`, below is `FALSE`). +#' If used with a loss function that sets `from_logits=TRUE` (i.e. no +#' sigmoid applied to predictions), `thresholds` should be set to 0. +#' One metric value is generated for each threshold value. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family confusion metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.TruePositives +metric_true_positives <- +function (..., thresholds = NULL, name = NULL, dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$TruePositives, args) +} + + +#' Computes F-1 Score. +#' +#' @description +#' Formula: +#' +#' ```{r, eval=FALSE} +#' f1_score <- 2 * (precision * recall) / (precision + recall) +#' ``` +#' This is the harmonic mean of precision and recall. +#' Its output range is `[0, 1]`. It works for both multi-class +#' and multi-label classification. +#' +#' # Examples +#' ```{r} +#' metric <- metric_f1_score(threshold = 0.5) +#' y_true <- rbind(c(1, 1, 1), +#' c(1, 0, 0), +#' c(1, 1, 0)) +#' y_pred <- rbind(c(0.2, 0.6, 0.7), +#' c(0.2, 0.6, 0.6), +#' c(0.6, 0.8, 0.0)) +#' metric$update_state(y_true, y_pred) +#' result <- metric$result() +#' result +#' ``` +#' +#' # Returns +#' F-1 Score: float. +#' +#' @param average +#' Type of averaging to be performed on data. +#' Acceptable values are `NULL`, `"micro"`, `"macro"` +#' and `"weighted"`. Defaults to `NULL`. +#' If `NULL`, no averaging is performed and `result()` will return +#' the score for each class. +#' If `"micro"`, compute metrics globally by counting the total +#' true positives, false negatives and false positives. +#' If `"macro"`, compute metrics for each label, +#' and return their unweighted mean. +#' This does not take label imbalance into account. +#' If `"weighted"`, compute metrics for each label, +#' and return their average weighted by support +#' (the number of true instances for each label). +#' This alters `"macro"` to account for label imbalance. +#' It can result in an score that is not between precision and recall. +#' +#' @param threshold +#' Elements of `y_pred` greater than `threshold` are +#' converted to be 1, and the rest 0. If `threshold` is +#' `NULL`, the argmax of `y_pred` is converted to 1, and the rest to 0. +#' +#' @param name +#' Optional. String name of the metric instance. +#' +#' @param dtype +#' Optional. Data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family f score metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.F1Score +metric_f1_score <- +function (..., average = NULL, threshold = NULL, name = "f1_score", + dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$F1Score, args) +} + + +#' Computes F-Beta score. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' b2 <- beta^2 +#' f_beta_score <- (1 + b2) * (precision * recall) / (precision * b2 + recall) +#' ``` +#' This is the weighted harmonic mean of precision and recall. +#' Its output range is `[0, 1]`. It works for both multi-class +#' and multi-label classification. +#' +#' # Examples +#' ```{r} +#' metric <- metric_fbeta_score(beta = 2.0, threshold = 0.5) +#' y_true <- rbind(c(1, 1, 1), +#' c(1, 0, 0), +#' c(1, 1, 0)) +#' y_pred <- rbind(c(0.2, 0.6, 0.7), +#' c(0.2, 0.6, 0.6), +#' c(0.6, 0.8, 0.0)) +#' metric$update_state(y_true, y_pred) +#' metric$result() +#' ``` +#' +#' # Returns +#' F-Beta Score: float. +#' +#' @param average +#' Type of averaging to be performed across per-class results +#' in the multi-class case. +#' Acceptable values are `NULL`, `"micro"`, `"macro"` and +#' `"weighted"`. Defaults to `NULL`. +#' If `NULL`, no averaging is performed and `result()` will return +#' the score for each class. +#' If `"micro"`, compute metrics globally by counting the total +#' true positives, false negatives and false positives. +#' If `"macro"`, compute metrics for each label, +#' and return their unweighted mean. +#' This does not take label imbalance into account. +#' If `"weighted"`, compute metrics for each label, +#' and return their average weighted by support +#' (the number of true instances for each label). +#' This alters `"macro"` to account for label imbalance. +#' It can result in an score that is not between precision and recall. +#' +#' @param beta +#' Determines the weight of given to recall +#' in the harmonic mean between precision and recall (see pseudocode +#' equation above). Defaults to `1`. +#' +#' @param threshold +#' Elements of `y_pred` greater than `threshold` are +#' converted to be 1, and the rest 0. If `threshold` is +#' `NULL`, the argmax of `y_pred` is converted to 1, and the rest to 0. +#' +#' @param name +#' Optional. String name of the metric instance. +#' +#' @param dtype +#' Optional. Data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family f score metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.FBetaScore +metric_fbeta_score <- +function (..., average = NULL, beta = 1, threshold = NULL, name = "fbeta_score", + dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$FBetaScore, args) +} + + +#' Computes the categorical hinge metric between `y_true` and `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- maximum(neg - pos + 1, 0) +#' ``` +#' +#' where `neg=maximum((1-y_true)*y_pred)` and `pos=sum(y_true*y_pred)` +#' +#' # Usage +#' Standalone usage: +#' ```{r} +#' m <- metric_categorical_hinge() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6))) +#' m$result() +#' +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` +#' +#' @returns +#' Categorical hinge loss values with shape = `[batch_size, d0, .. dN-1]`. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param y_true +#' The ground truth values. `y_true` values are expected to be +#' either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor) with +#' shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return +#' @export +#' @family losses +#' @family metrics +#' @family hinge metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.CategoricalHinge +metric_categorical_hinge <- +function (y_true, y_pred, ..., name = "categorical_hinge", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$CategoricalHinge + else keras$metrics$categorical_hinge + do.call(callable, args) +} + + +#' Computes the hinge metric between `y_true` and `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- mean(maximum(1 - y_true * y_pred, 0), axis=-1) +#' ``` +#' +#' `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are +#' provided we will convert them to -1 or 1. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_hinge() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6))) +#' m$result() +#' +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param y_true +#' The ground truth values. `y_true` values are expected to be -1 +#' or 1. If binary (0 or 1) labels are provided they will be converted +#' to -1 or 1 with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return +#' @export +#' @family losses +#' @family metrics +#' @family hinge metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.Hinge +metric_hinge <- +function (y_true, y_pred, ..., name = "hinge", dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$Hinge + else keras$metrics$hinge + do.call(callable, args) +} + + +#' Computes the hinge metric between `y_true` and `y_pred`. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- mean(square(maximum(1 - y_true * y_pred, 0))) +#' ``` +#' +#' `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are +#' provided we will convert them to -1 or 1. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_squared_hinge() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6))) +#' m$result() +#' +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param y_true +#' The ground truth values. `y_true` values are expected to be -1 +#' or 1. If binary (0 or 1) labels are provided we will convert them +#' to -1 or 1 with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return +#' @export +#' @family losses +#' @family metrics +#' @family hinge metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.SquaredHinge +metric_squared_hinge <- +function (y_true, y_pred, ..., name = "squared_hinge", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$SquaredHinge + else keras$metrics$squared_hinge + do.call(callable, args) +} + + +#' Computes the Intersection-Over-Union metric for class 0 and/or 1. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' iou <- true_positives / (true_positives + false_positives + false_negatives) +#' ``` +#' Intersection-Over-Union is a common evaluation metric for semantic image +#' segmentation. +#' +#' To compute IoUs, the predictions are accumulated in a confusion matrix, +#' weighted by `sample_weight` and the metric is then calculated from it. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' This class can be used to compute IoUs for a binary classification task +#' where the predictions are provided as logits. First a `threshold` is applied +#' to the predicted values such that those that are below the `threshold` are +#' converted to class 0 and those that are above the `threshold` are converted +#' to class 1. +#' +#' IoUs for classes 0 and 1 are then computed, the mean of IoUs for the classes +#' that are specified by `target_class_ids` is returned. +#' +#' # Note +#' with `threshold=0`, this metric has the same behavior as `IoU`. +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_binary_iou(target_class_ids=c(0L, 1L), threshold = 0.3) +#' m$update_state(c(0, 1, 0, 1), c(0.1, 0.2, 0.4, 0.7)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 1, 0, 1), c(0.1, 0.2, 0.4, 0.7), +#' sample_weight = c(0.2, 0.3, 0.4, 0.1)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_binary_iou( +#' target_class_ids = 0L, +#' threshold = 0.5 +#' )) +#' ) +#' ``` +#' +#' @param target_class_ids +#' A list or list of target class ids for which the +#' metric is returned. Options are `0`, `1`, or `c(0, 1)`. With +#' `0` (or `1`), the IoU metric for class 0 (or class 1, +#' respectively) is returned. With `c(0, 1)`, the mean of IoUs for the +#' two classes is returned. +#' +#' @param threshold +#' A threshold that applies to the prediction logits to convert +#' them to either predicted class 0 if the logit is below `threshold` +#' or predicted class 1 if the logit is above `threshold`. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family iou metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.BinaryIoU +metric_binary_iou <- +function (..., target_class_ids = list(0L, 1L), threshold = 0.5, + name = NULL, dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$BinaryIoU, args) +} + + +#' Computes the Intersection-Over-Union metric for specific target classes. +#' +#' @description +#' Formula: +#' +#' ```{r, eval=FALSE} +#' iou <- true_positives / (true_positives + false_positives + false_negatives) +#' ``` +#' Intersection-Over-Union is a common evaluation metric for semantic image +#' segmentation. +#' +#' To compute IoUs, the predictions are accumulated in a confusion matrix, +#' weighted by `sample_weight` and the metric is then calculated from it. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' Note, this class first computes IoUs for all individual classes, then +#' returns the mean of IoUs for the classes that are specified by +#' `target_class_ids`. If `target_class_ids` has only one id value, the IoU of +#' that specific class is returned. +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_iou(num_classes = 2L, target_class_ids = list(0L)) +#' m$update_state(c(0, 0, 1, 1), c(0, 1, 0, 1)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 0, 1, 1), c(0, 1, 0, 1), +#' sample_weight = c(0.3, 0.3, 0.3, 0.1)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval=FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_iou(num_classes = 2L, target_class_ids = list(0L)))) +#' ``` +#' +#' @param num_classes +#' The possible number of labels the prediction task can have. +#' +#' @param target_class_ids +#' A list of target class ids for which the +#' metric is returned. To compute IoU for a specific class, a list +#' of a single id value should be provided. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ignore_class +#' Optional integer. The ID of a class to be ignored during +#' metric computation. This is useful, for example, in segmentation +#' problems featuring a "void" class (commonly -1 or 255) in +#' segmentation maps. By default (`ignore_class=NULL`), all classes are +#' considered. +#' +#' @param sparse_y_true +#' Whether labels are encoded using integers or +#' dense floating point vectors. If `FALSE`, the `argmax` function +#' is used to determine each sample's most likely associated label. +#' +#' @param sparse_y_pred +#' Whether predictions are encoded using integers or +#' dense floating point vectors. If `FALSE`, the `argmax` function +#' is used to determine each sample's most likely associated label. +#' +#' @param axis +#' (Optional) -1 is the dimension containing the logits. +#' Defaults to `-1`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family iou metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.IoU +metric_iou <- +function (..., num_classes, target_class_ids, name = NULL, dtype = NULL, + ignore_class = NULL, sparse_y_true = TRUE, sparse_y_pred = TRUE, + axis = -1L) +{ + args <- capture_args(list(ignore_class = as_integer, axis = as_axis)) + do.call(keras$metrics$IoU, args) +} + + +#' Computes the mean Intersection-Over-Union metric. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' iou <- true_positives / (true_positives + false_positives + false_negatives) +#' ``` +#' Intersection-Over-Union is a common evaluation metric for semantic image +#' segmentation. +#' +#' To compute IoUs, the predictions are accumulated in a confusion matrix, +#' weighted by `sample_weight` and the metric is then calculated from it. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' Note that this class first computes IoUs for all individual classes, then +#' returns the mean of these values. +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' # cm = [[1, 1], +#' # [1, 1]] +#' # sum_row = [2, 2], sum_col = [2, 2], true_positives = [1, 1] +#' # iou = true_positives / (sum_row + sum_col - true_positives)) +#' # result = (1 / (2 + 2 - 1) + 1 / (2 + 2 - 1)) / 2 = 0.33 +#' m <- metric_mean_iou(num_classes = 2) +#' m$update_state(c(0, 0, 1, 1), c(0, 1, 0, 1)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(0, 0, 1, 1), c(0, 1, 0, 1), +#' sample_weight=c(0.3, 0.3, 0.3, 0.1)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_mean_iou(num_classes=2))) +#' ``` +#' +#' @param num_classes +#' The possible number of labels the prediction task can have. +#' This value must be provided, since a confusion matrix of dimension = +#' `[num_classes, num_classes]` will be allocated. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ignore_class +#' Optional integer. The ID of a class to be ignored during +#' metric computation. This is useful, for example, in segmentation +#' problems featuring a "void" class (commonly -1 or 255) in +#' segmentation maps. By default (`ignore_class=NULL`), all classes are +#' considered. +#' +#' @param sparse_y_true +#' Whether labels are encoded using integers or +#' dense floating point vectors. If `FALSE`, the `argmax` function +#' is used to determine each sample's most likely associated label. +#' +#' @param sparse_y_pred +#' Whether predictions are encoded using integers or +#' dense floating point vectors. If `FALSE`, the `argmax` function +#' is used to determine each sample's most likely associated label. +#' +#' @param axis +#' (Optional) The dimension containing the logits. Defaults to `-1`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family iou metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.MeanIoU +metric_mean_iou <- +function (..., num_classes, name = NULL, dtype = NULL, ignore_class = NULL, + sparse_y_true = TRUE, sparse_y_pred = TRUE, axis = -1L) +{ + args <- capture_args(list(ignore_class = as_integer, axis = as_axis, + num_classes = as_integer)) + do.call(keras$metrics$MeanIoU, args) +} + + +#' Computes the Intersection-Over-Union metric for one-hot encoded labels. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' iou <- true_positives / (true_positives + false_positives + false_negatives) +#' ``` +#' Intersection-Over-Union is a common evaluation metric for semantic image +#' segmentation. +#' +#' To compute IoUs, the predictions are accumulated in a confusion matrix, +#' weighted by `sample_weight` and the metric is then calculated from it. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' This class can be used to compute IoU for multi-class classification tasks +#' where the labels are one-hot encoded (the last axis should have one +#' dimension per class). Note that the predictions should also have the same +#' shape. To compute the IoU, first the labels and predictions are converted +#' back into integer format by taking the argmax over the class axis. Then the +#' same computation steps as for the base `IoU` class apply. +#' +#' Note, if there is only one channel in the labels and predictions, this class +#' is the same as class `IoU`. In this case, use `IoU` instead. +#' +#' Also, make sure that `num_classes` is equal to the number of classes in the +#' data, to avoid a "labels out of bound" error when the confusion matrix is +#' computed. +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' y_true <- rbind(c(0, 0, 1), c(1, 0, 0), c(0, 1, 0), c(1, 0, 0)) +#' y_pred <- rbind(c(0.2, 0.3, 0.5), c(0.1, 0.2, 0.7), c(0.5, 0.3, 0.1), +#' c(0.1, 0.4, 0.5)) +#' sample_weight <- c(0.1, 0.2, 0.3, 0.4) +#' m <- metric_one_hot_iou(num_classes = 3, target_class_ids = c(0, 2)) +#' m$update_state( +#' y_true = y_true, y_pred = y_pred, sample_weight = sample_weight) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_one_hot_iou( +#' num_classes = 3L, +#' target_class_id = list(1L) +#' )) +#' ) +#' ``` +#' +#' @param num_classes +#' The possible number of labels the prediction task can have. +#' +#' @param target_class_ids +#' A list or list of target class ids for which the +#' metric is returned. To compute IoU for a specific class, a list +#' (or list) of a single id value should be provided. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ignore_class +#' Optional integer. The ID of a class to be ignored during +#' metric computation. This is useful, for example, in segmentation +#' problems featuring a "void" class (commonly -1 or 255) in +#' segmentation maps. By default (`ignore_class=NULL`), all classes are +#' considered. +#' +#' @param sparse_y_pred +#' Whether predictions are encoded using integers or +#' dense floating point vectors. If `FALSE`, the `argmax` function +#' is used to determine each sample's most likely associated label. +#' +#' @param axis +#' (Optional) The dimension containing the logits. Defaults to `-1`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family iou metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.OneHotIoU +metric_one_hot_iou <- +function (..., num_classes, target_class_ids, name = NULL, dtype = NULL, + ignore_class = NULL, sparse_y_pred = FALSE, axis = -1L) +{ + args <- capture_args(list(ignore_class = as_integer, axis = as_axis, + num_classes = as_integer, target_class_ids = function (x) + lapply(x, as_integer))) + do.call(keras$metrics$OneHotIoU, args) +} + + +#' Computes mean Intersection-Over-Union metric for one-hot encoded labels. +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' iou <- true_positives / (true_positives + false_positives + false_negatives) +#' ``` +#' Intersection-Over-Union is a common evaluation metric for semantic image +#' segmentation. +#' +#' To compute IoUs, the predictions are accumulated in a confusion matrix, +#' weighted by `sample_weight` and the metric is then calculated from it. +#' +#' If `sample_weight` is `NULL`, weights default to 1. +#' Use `sample_weight` of 0 to mask values. +#' +#' This class can be used to compute the mean IoU for multi-class +#' classification tasks where the labels are one-hot encoded (the last axis +#' should have one dimension per class). Note that the predictions should also +#' have the same shape. To compute the mean IoU, first the labels and +#' predictions are converted back into integer format by taking the argmax over +#' the class axis. Then the same computation steps as for the base `MeanIoU` +#' class apply. +#' +#' Note, if there is only one channel in the labels and predictions, this class +#' is the same as class `metric_mean_iou`. In this case, use `metric_mean_iou` instead. +#' +#' Also, make sure that `num_classes` is equal to the number of classes in the +#' data, to avoid a "labels out of bound" error when the confusion matrix is +#' computed. +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' y_true <- rbind(c(0, 0, 1), c(1, 0, 0), c(0, 1, 0), c(1, 0, 0)) +#' y_pred <- rbind(c(0.2, 0.3, 0.5), c(0.1, 0.2, 0.7), c(0.5, 0.3, 0.1), +#' c(0.1, 0.4, 0.5)) +#' sample_weight <- c(0.1, 0.2, 0.3, 0.4) +#' m <- metric_one_hot_mean_iou(num_classes = 3L) +#' m$update_state( +#' y_true = y_true, y_pred = y_pred, sample_weight = sample_weight) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_one_hot_mean_iou(num_classes = 3L))) +#' ``` +#' +#' @param num_classes +#' The possible number of labels the prediction task can have. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ignore_class +#' Optional integer. The ID of a class to be ignored during +#' metric computation. This is useful, for example, in segmentation +#' problems featuring a "void" class (commonly -1 or 255) in +#' segmentation maps. By default (`ignore_class=NULL`), all classes are +#' considered. +#' +#' @param sparse_y_pred +#' Whether predictions are encoded using natural numbers or +#' probability distribution vectors. If `FALSE`, the `argmax` +#' function will be used to determine each sample's most likely +#' associated label. +#' +#' @param axis +#' (Optional) The dimension containing the logits. Defaults to `-1`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family iou metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.OneHotMeanIoU +metric_one_hot_mean_iou <- +function (..., num_classes, name = NULL, dtype = NULL, ignore_class = NULL, + sparse_y_pred = FALSE, axis = -1L) +{ + args <- capture_args(list(ignore_class = as_integer, axis = as_axis, + num_classes = as_integer)) + do.call(keras$metrics$OneHotMeanIoU, args) +} + + +#' Computes the crossentropy metric between the labels and predictions. +#' +#' @description +#' This is the crossentropy metric class to be used when there are only two +#' label classes (0 and 1). +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_binary_crossentropy() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6))) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6)), +#' sample_weight=c(1, 0)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_binary_crossentropy())) +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param from_logits +#' (Optional) Whether output is expected +#' to be a logits tensor. By default, we consider +#' that output encodes a probability distribution. +#' +#' @param label_smoothing +#' (Optional) Float in `[0, 1]`. +#' When > 0, label values are smoothed, +#' meaning the confidence on label values are relaxed. +#' e.g. `label_smoothing=0.2` means that we will use +#' a value of 0.1 for label "0" and 0.9 for label "1". +#' +#' @param y_true +#' Ground truth values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param axis +#' The axis along which the mean is computed. Defaults to `-1`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return +#' @export +#' @family losses +#' @family metrics +#' @family probabilistic metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.BinaryCrossentropy +metric_binary_crossentropy <- +function (y_true, y_pred, from_logits = FALSE, label_smoothing = 0, + axis = -1L, ..., name = "binary_crossentropy", dtype = NULL) +{ + args <- capture_args(list(label_smoothing = as_integer, + y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x), axis = as_axis)) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$BinaryCrossentropy + else keras$metrics$binary_crossentropy + do.call(callable, args) +} + + +#' Computes the crossentropy metric between the labels and predictions. +#' +#' @description +#' This is the crossentropy metric class to be used when there are multiple +#' label classes (2 or more). It assumes that labels are one-hot encoded, +#' e.g., when labels values are `c(2, 0, 1)`, then +#' `y_true` is `rbind(c([0, 0, 1), c(1, 0, 0), c(0, 1, 0))`. +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' # EPSILON = 1e-7, y = y_true, y` = y_pred +#' # y` = clip_op_clip_by_value(output, EPSILON, 1. - EPSILON) +#' # y` = rbind(c(0.05, 0.95, EPSILON), c(0.1, 0.8, 0.1)) +#' # xent = -sum(y * log(y'), axis = -1) +#' # = -((log 0.95), (log 0.1)) +#' # = [0.051, 2.302] +#' # Reduced xent = (0.051 + 2.302) / 2 +#' m <- metric_categorical_crossentropy() +#' m$update_state(rbind(c(0, 1, 0), c(0, 0, 1)), +#' rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1))) +#' m$result() +#' # 1.1769392 +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(c(0, 1, 0), c(0, 0, 1)), +#' rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)), +#' sample_weight = c(0.3, 0.7)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_categorical_crossentropy())) +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param from_logits +#' (Optional) Whether output is expected to be +#' a logits tensor. By default, we consider that output +#' encodes a probability distribution. +#' +#' @param label_smoothing +#' (Optional) Float in `[0, 1]`. +#' When > 0, label values are smoothed, meaning the confidence +#' on label values are relaxed. e.g. `label_smoothing=0.2` means +#' that we will use a value of 0.1 for label +#' "0" and 0.9 for label "1". +#' +#' @param axis +#' (Optional) Defaults to `-1`. +#' The dimension along which entropy is computed. +#' +#' @param y_true +#' Tensor of one-hot true targets. +#' +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return +#' @export +#' @family losses +#' @family metrics +#' @family probabilistic metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.CategoricalCrossentropy +metric_categorical_crossentropy <- +function (y_true, y_pred, from_logits = FALSE, label_smoothing = 0, + axis = -1L, ..., name = "categorical_crossentropy", dtype = NULL) +{ + args <- capture_args(list(label_smoothing = as_integer, + axis = as_axis, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$CategoricalCrossentropy + else keras$metrics$categorical_crossentropy + do.call(callable, args) +} + + +#' Computes Kullback-Leibler divergence metric between `y_true` and +#' +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' loss <- y_true * log(y_true / y_pred) +#' ``` +#' +#' `y_true` and `y_pred` are expected to be probability +#' distributions, with values between 0 and 1. They will get +#' clipped to the `[0, 1]` range. +#' +#' # Usage +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_kl_divergence() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6))) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(0.6, 0.4), c(0.4, 0.6)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_kl_divergence())) +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param y_true +#' Tensor of true targets. +#' +#' @param y_pred +#' Tensor of predicted targets. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family losses +#' @family metrics +#' @family probabilistic metrics +#' @inherit metric_binary_accuracy return +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.KLDivergence +metric_kl_divergence <- +function (y_true, y_pred, ..., name = "kl_divergence", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$KLDivergence + else keras$metrics$kl_divergence + do.call(callable, args) +} + + +#' Computes the Poisson metric between `y_true` and `y_pred`. +#' +#' @description +#' +#' Formula: +#' +#' ```{r, eval = FALSE} +#' metric <- y_pred - y_true * log(y_pred) +#' ``` +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_poisson() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0))) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_poisson()) +#' ) +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param y_true +#' Ground truth values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values. shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family losses +#' @family metrics +#' @inherit metric_binary_accuracy return +#' @family probabilistic metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.Poisson +metric_poisson <- +function (y_true, y_pred, ..., name = "poisson", dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$Poisson + else keras$metrics$poisson + do.call(callable, args) +} + + +#' Computes the crossentropy metric between the labels and predictions. +#' +#' @description +#' Use this crossentropy metric when there are two or more label classes. +#' It expects labels to be provided as integers. If you want to provide labels +#' that are one-hot encoded, please use the `metric_categorical_crossentropy()` +#' metric instead. +#' +#' There should be `num_classes` floating point values per feature for `y_pred` +#' and a single floating point value per feature for `y_true`. +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_sparse_categorical_crossentropy() +#' m$update_state(c(1, 2), +#' rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1))) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(1, 2), +#' rbind(c(0.05, 0.95, 0), c(0.1, 0.8, 0.1)), +#' sample_weight = c(0.3, 0.7)) +#' m$result() +#' # 1.6271976 +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_sparse_categorical_crossentropy())) +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param from_logits +#' (Optional) Whether output is expected +#' to be a logits tensor. By default, we consider that output +#' encodes a probability distribution. +#' +#' @param axis +#' (Optional) Defaults to `-1`. +#' The dimension along which entropy is computed. +#' +#' @param y_true +#' Ground truth values. +#' +#' @param y_pred +#' The predicted values. +#' +#' @param ignore_class +#' Optional integer. The ID of a class to be ignored during +#' loss computation. This is useful, for example, in segmentation +#' problems featuring a "void" class (commonly -1 or 255) in +#' segmentation maps. By default (`ignore_class=NULL`), all classes are +#' considered. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family losses +#' @inherit metric_binary_accuracy return +#' @family metrics +#' @family probabilistic metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.SparseCategoricalCrossentropy +metric_sparse_categorical_crossentropy <- +function (y_true, y_pred, from_logits = FALSE, ignore_class = NULL, + axis = -1L, ..., name = "sparse_categorical_crossentropy", + dtype = NULL) +{ + args <- capture_args(list(axis = as_axis, y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x), ignore_class = as_integer)) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$SparseCategoricalCrossentropy + else keras$metrics$sparse_categorical_crossentropy + do.call(callable, args) +} + + +#' Compute the (weighted) mean of the given values. +#' +#' @description +#' For example, if values is `c(1, 3, 5, 7)` then the mean is 4. +#' If `sample_weight` was specified as `c(1, 1, 0, 0)` then the mean would be 2. +#' +#' This metric creates two variables, `total` and `count`. +#' The mean value returned is simply `total` divided by `count`. +#' +#' # Examples +#' ```{r} +#' m <- metric_mean() +#' m$update_state(c(1, 3, 5, 7)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(c(1, 3, 5, 7), sample_weight = c(1, 1, 0, 0)) +#' m$result() +#' ``` +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return +#' @export +#' @family reduction metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.Mean +metric_mean <- +function (..., name = "mean", dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$Mean, args) +} + + +#' Wrap a stateless metric function with the `Mean` metric. +#' +#' @description +#' You could use this class to quickly build a mean metric from a function. The +#' function needs to have the signature `fn(y_true, y_pred)` and return a +#' per-sample loss array. `metric_mean_wrapper$result()` will return +#' the average metric value across all samples seen so far. +#' +#' For example: +#' +#' ```{r} +#' mse <- function(y_true, y_pred) { +#' (y_true - y_pred)^2 +#' } +#' +#' mse_metric <- metric_mean_wrapper(fn = mse) +#' mse_metric$update_state(c(0, 1), c(1, 1)) +#' mse_metric$result() +#' ``` +#' +#' @param fn +#' The metric function to wrap, with signature +#' `fn(y_true, y_pred)`. +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' Keyword arguments to pass on to `fn`. +#' +#' @inherit metric_auc return +#' @export +#' @family reduction metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.MeanMetricWrapper +metric_mean_wrapper <- +function (..., fn, name = NULL, dtype = NULL) +{ + args <- capture_args(list(fn = function(x) as_py_function( + x, default_name = + if (is.null(name)) { + if (is.symbol(fn_expr <- substitute(fn))) + deparse(fn_expr) + else + "custom_metric" + } else + paste0(name, "_fn")))) + do.call(keras$metrics$MeanMetricWrapper, args) +} + + +#' Compute the (weighted) sum of the given values. +#' +#' @description +#' For example, if `values` is `[1, 3, 5, 7]` then their sum is 16. +#' If `sample_weight` was specified as `[1, 1, 0, 0]` then the sum would be 4. +#' +#' This metric creates one variable, `total`. +#' This is ultimately returned as the sum value. +#' +#' # Examples +#' ```{r} +#' m <- metric_sum() +#' m$update_state(c(1, 3, 5, 7)) +#' m$result() +#' ``` +#' +#' ```{r} +#' m <- metric_sum() +#' m$update_state(c(1, 3, 5, 7), sample_weight = c(1, 1, 0, 0)) +#' m$result() +#' ``` #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' @param name +#' (Optional) string name of the metric instance. #' -#' @param thresholds (Optional) Defaults to 0.5. A float value or a -#' list of float threshold values in `[0, 1]`. A threshold is compared -#' with prediction values to determine the truth value of predictions -#' (i.e., above the threshold is `true`, below is `false`). One metric -#' value is generated for each threshold value. +#' @param dtype +#' (Optional) data type of the metric result. #' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' @inherit metric_auc return #' @export -metric_true_negatives <- py_metric_wrapper( - NULL, TrueNegatives, - alist(thresholds = NULL)) - +#' @family reduction metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.Sum +metric_sum <- +function (..., name = "sum", dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$Sum, args) +} -#' Calculates the number of true positives +#' Computes the cosine similarity between the labels and predictions. #' -#' @details -#' If `sample_weight` is given, calculates the sum of the weights of -#' true positives. This metric creates one local variable, `true_positives` -#' that is used to keep track of the number of true positives. +#' @description +#' Formula: #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' ```{r, eval=FALSE} +#' loss <- sum(l2_norm(y_true) * l2_norm(y_pred)) +#' ``` +#' See: [Cosine Similarity](https://en.wikipedia.org/wiki/Cosine_similarity). +#' This metric keeps the average cosine similarity between `predictions` and +#' `labels` over a stream of data. #' -#' @param thresholds (Optional) Defaults to 0.5. A float value or a -#' list of float threshold values in `[0, 1]`. A threshold is compared -#' with prediction values to determine the truth value of predictions -#' (i.e., above the threshold is `true`, below is `false`). One metric -#' value is generated for each threshold value. +#' # Examples +#' Standalone usage: #' +#' ```{r} +#' m <- metric_cosine_similarity(axis=2) +#' m$update_state(rbind(c(0., 1.), c(1., 1.)), rbind(c(1., 0.), c(1., 1.))) +#' m$result() #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_true_positives <- py_metric_wrapper( - NULL, TruePositives, - alist(thresholds = NULL)) - - - -#' Computes the hinge metric between `y_true` and `y_pred` +#' m$reset_state() +#' m$update_state(rbind(c(0., 1.), c(1., 1.)), rbind(c(1., 0.), c(1., 1.)), +#' sample_weight = c(0.3, 0.7)) +#' m$result() +#' ``` #' -#' `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are -#' provided we will convert them to -1 or 1. +#' Usage with `compile()` API: #' -#' ``` -#' loss = tf$reduce_mean(tf$maximum(1 - y_true * y_pred, 0L), axis=-1L) +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_cosine_similarity(axis=2))) #' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param axis +#' (Optional) Defaults to `-1`. The dimension along which the cosine +#' similarity is computed. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return #' @export -metric_hinge <- py_metric_wrapper(hinge, Hinge) +#' @family regression metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.CosineSimilarity +metric_cosine_similarity <- +function (..., name = "cosine_similarity", dtype = NULL, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$metrics$CosineSimilarity, args) +} -#' Computes Kullback-Leibler divergence +#' Computes the logarithm of the hyperbolic cosine of the prediction error. #' -#' @details +#' @description +#' Formula: +#' +#' ```{r, eval = FALSE} +#' error <- y_pred - y_true +#' logcosh <- mean(log((exp(error) + exp(-error))/2), axis=-1) #' ``` -#' metric = y_true * log(y_true / y_pred) +#' +#' # Examples +#' Standalone usage: +#' +#' ```{r} +#' m <- metric_log_cosh_error() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0))) +#' m$result() +#' +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0)), +#' sample_weight = c(1, 0)) +#' m$result() #' ``` #' -#' See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence +#' Usage with `compile()` API: #' +#' ```{r, eval = FALSE} +#' model %>% compile(optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_log_cosh_error())) +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics -#' @export -metric_kullback_leibler_divergence <- py_metric_wrapper( - kullback_leibler_divergence, KLDivergence -) - - - -#' Computes the logarithm of the hyperbolic cosine of the prediction error +#' @param name +#' (Optional) string name of the metric instance. #' -#' `logcosh = log((exp(x) + exp(-x))/2)`, where x is the error (`y_pred - y_true`) +#' @param dtype +#' (Optional) data type of the metric result. #' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' @inherit metric_auc return #' @export -metric_logcosh_error <- py_metric_wrapper( - NULL, LogCoshError, - alist(name = "logcosh") -) - +#' @family regression metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.LogCoshError +metric_log_cosh_error <- +function (..., name = "logcosh", dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$LogCoshError, args) +} -#' Computes the (weighted) mean of the given values +#' Computes the mean absolute error between the labels and predictions. #' -#' @details -#' For example, if values is `c(1, 3, 5, 7)` then the mean is 4. -#' If the weights were specified as `c(1, 1, 0, 0)` then the mean would be 2. +#' @description #' -#' This metric creates two variables, `total` and `count` that are used to -#' compute the average of `values`. This average is ultimately returned as `mean` -#' which is an idempotent operation that simply divides `total` by `count`. +#' Formula: #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' ```{r, eval = FALSE} +#' loss <- mean(abs(y_true - y_pred)) +#' ``` #' -#' @note Unlike most other metrics, this only takes a single tensor as input to update state. +#' # Examples +#' Standalone usage: #' -#' Example usage with `compile()`: -#' ```` -#' model$add_metric(metric_mean(name='mean_1')(outputs)) -#' model %>% compile(optimizer='sgd', loss='mse') -#' ```` -#' Example standalone usage: -#' ``` -#' m <- metric_mean() -#' m$update_state(c(1, 3, 5, 7)) +#' ```{r} +#' m <- metric_mean_absolute_error() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0))) #' m$result() #' #' m$reset_state() -#' m$update_state(c(1, 3, 5, 7), sample_weight=c(1, 1, 0, 0)) +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0)), +#' sample_weight = c(1, 0)) #' m$result() -#' as.numeric(m$result()) #' ``` #' +#' Usage with `compile()` API: #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_mean <- py_metric_wrapper( - NULL, Mean, - alist(name = "mean") -) - - - -#' Computes the mean absolute error between the labels and predictions +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_mean_absolute_error())) +#' ``` #' -#' @details -#' `loss = mean(abs(y_true - y_pred), axis=-1)` +#' @param name +#' (Optional) string name of the metric instance. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics -#' @export -metric_mean_absolute_error <- py_metric_wrapper( - mean_absolute_error, MeanAbsoluteError -) - - - -#' Computes the mean absolute percentage error between `y_true` and `y_pred` +#' @param dtype +#' (Optional) data type of the metric result. #' -#' @details -#' `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)` +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. +#' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics #' @export -metric_mean_absolute_percentage_error <- py_metric_wrapper( - mean_absolute_percentage_error, MeanAbsolutePercentageError -) - +#' @inherit metric_binary_accuracy return +#' @family losses +#' @family metrics +#' @family regression metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.MeanAbsoluteError +metric_mean_absolute_error <- +function (y_true, y_pred, ..., name = "mean_absolute_error", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$MeanAbsoluteError + else keras$metrics$mean_absolute_error + do.call(callable, args) +} -#' Computes the mean Intersection-Over-Union metric +#' Computes mean absolute percentage error between `y_true` and `y_pred`. #' -#' @details -#' Mean Intersection-Over-Union is a common evaluation metric for semantic image -#' segmentation, which first computes the IOU for each semantic class and then -#' computes the average over classes. IOU is defined as follows: -#' ```` -#' IOU = true_positive / (true_positive + false_positive + false_negative) -#' ```` -#' The predictions are accumulated in a confusion matrix, weighted by -#' `sample_weight` and the metric is then calculated from it. +#' @description +#' Formula: #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' ```{r, eval = FALSE} +#' loss <- 100 * mean(abs((y_true - y_pred) / y_true), axis=-1) +#' ``` #' -#' @param num_classes The possible number of labels the prediction task can have. -#' This value must be provided, since a confusion matrix of `dim` -#' `c(num_classes, num_classes)` will be allocated. +#' Division by zero is prevented by dividing by `maximum(y_true, epsilon)` +#' where `epsilon = keras$backend$epsilon()` +#' (default to `1e-7`). #' +#' # Examples +#' Standalone usage: #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_mean_iou <- py_metric_wrapper( - NULL, MeanIoU, - alist(num_classes = ), - list(num_classes = as.integer) -) - - - -#' Wraps a stateless metric function with the Mean metric +#' ```{r} +#' m <- metric_mean_absolute_percentage_error() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0))) +#' m$result() #' -#' @details -#' You could use this class to quickly build a mean metric from a function. The -#' function needs to have the signature `fn(y_true, y_pred)` and return a -#' per-sample loss array. `MeanMetricWrapper$result()` will return -#' the average metric value across all samples seen so far. +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` #' -#' For example: +#' Usage with `compile()` API: #' -#' ```r -#' accuracy <- function(y_true, y_pred) -#' k_cast(y_true == y_pred, 'float32') +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_mean_absolute_percentage_error())) +#' ``` #' -#' accuracy_metric <- metric_mean_wrapper(fn = accuracy) +#' @param name +#' (Optional) string name of the metric instance. #' -#' model %>% compile(..., metrics=accuracy_metric) -#' ``` +#' @param dtype +#' (Optional) data type of the metric result. #' -#' @param fn The metric function to wrap, with signature `fn(y_true, y_pred, ...)`. +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. #' -#' @param ... named arguments to pass on to `fn`. +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. #' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics +#' @inherit metric_binary_accuracy return #' @export -metric_mean_wrapper <- py_metric_wrapper( - NULL, MeanMetricWrapper, - alist(fn = ) -) - +#' @family losses +#' @family metrics +#' @family regression metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.MeanAbsolutePercentageError +metric_mean_absolute_percentage_error <- +function (y_true, y_pred, ..., name = "mean_absolute_percentage_error", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$MeanAbsolutePercentageError + else keras$metrics$mean_absolute_percentage_error + do.call(callable, args) +} -#' Computes the mean relative error by normalizing with the given values +#' Computes the mean squared error between `y_true` and `y_pred`. #' -#' @details -#' This metric creates two local variables, `total` and `count` that are used to -#' compute the mean relative error. This is weighted by `sample_weight`, and -#' it is ultimately returned as `mean_relative_error`: -#' an idempotent operation that simply divides `total` by `count`. +#' @description #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' Formula: #' +#' ```{r, eval = FALSE} +#' loss <- mean(square(y_true - y_pred)) #' ``` -#' metric = mean(|y_pred - y_true| / normalizer) -#' ``` -#' For example: -#' ``` -#' m = metric_mean_relative_error(normalizer=c(1, 3, 2, 3)) -#' m$update_state(c(1, 3, 2, 3), c(2, 4, 6, 8)) -#' # result = mean(c(1, 1, 4, 5) / c(1, 3, 2, 3)) = mean(c(1, 1/3, 2, 5/3)) -#' # = 5/4 = 1.25 +#' +#' # Examples +#' ```{r} +#' m <- metric_mean_squared_error() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0))) #' m$result() #' ``` #' -#' @param normalizer The normalizer values with same shape as predictions. +#' @param name +#' (Optional) string name of the metric instance. #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_mean_relative_error <- py_metric_wrapper( - NULL, MeanRelativeError, - alist(normalizer = ) -) - - - -#' Computes the mean squared error between labels and predictions +#' @param dtype +#' (Optional) data type of the metric result. #' -#' @details -#' After computing the squared distance between the inputs, the mean value over -#' the last dimension is returned. +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. #' -#' `loss = mean(square(y_true - y_pred), axis=-1)` +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_binary_accuracy return #' @export -metric_mean_squared_error <- py_metric_wrapper( - mean_absolute_percentage_error, MeanAbsolutePercentageError -) - +#' @family losses +#' @family metrics +#' @family regression metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.MeanSquaredError +metric_mean_squared_error <- +function (y_true, y_pred, ..., name = "mean_squared_error", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$MeanSquaredError + else keras$metrics$mean_squared_error + do.call(callable, args) +} -#' Computes the mean squared logarithmic error +#' Computes mean squared logarithmic error between `y_true` and `y_pred`. #' -#' @details -#' `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)` +#' @description +#' Formula: #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics -#' @export -metric_mean_squared_logarithmic_error <- py_metric_wrapper( - mean_squared_logarithmic_error, MeanSquaredLogarithmicError -) - - - -#' Computes the element-wise (weighted) mean of the given tensors +#' ```{r, eval = FALSE} +#' loss <- mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1) +#' ``` #' -#' @details -#' `MeanTensor` returns a tensor with the same shape of the input tensors. The -#' mean value is updated by keeping local variables `total` and `count`. The -#' `total` tracks the sum of the weighted values, and `count` stores the sum of -#' the weighted counts. +#' Note that `y_pred` and `y_true` cannot be less or equal to 0. Negative +#' values and 0 values will be replaced with `keras$backend$epsilon()` +#' (default to `1e-7`). #' -#' @param shape (Optional) A list of integers, a list of integers, or a 1-D Tensor -#' of type int32. If not specified, the shape is inferred from the values at -#' the first call of update_state. +#' # Examples +#' Standalone usage: #' +#' ```{r} +#' m <- metric_mean_squared_logarithmic_error() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0))) +#' m$result() #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_mean_tensor <- py_metric_wrapper( - NULL, MeanTensor, - alist(shape = NULL) -) - - - -#' Computes the Poisson metric between `y_true` and `y_pred` +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` #' -#' `metric = y_pred - y_true * log(y_pred)` +#' Usage with `compile()` API: #' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_mean_squared_logarithmic_error())) +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics -#' @export -metric_poisson <- py_metric_wrapper( - poisson, Poisson -) - - - -#' Computes the crossentropy metric between the labels and predictions +#' @param name +#' (Optional) string name of the metric instance. #' -#' @details -#' Use this crossentropy metric when there are two or more label classes. -#' We expect labels to be provided as integers. If you want to provide labels -#' using `one-hot` representation, please use `CategoricalCrossentropy` metric. -#' There should be `# classes` floating point values per feature for `y_pred` -#' and a single floating point value per feature for `y_true`. +#' @param dtype +#' (Optional) data type of the metric result. #' -#' In the snippet below, there is a single floating point value per example for -#' `y_true` and `# classes` floating pointing values per example for `y_pred`. -#' The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is -#' `[batch_size, num_classes]`. +#' @param y_true +#' Ground truth values with shape = `[batch_size, d0, .. dN]`. #' -#' @param from_logits (Optional) Whether output is expected to be a logits tensor. -#' By default, we consider that output encodes a probability distribution. +#' @param y_pred +#' The predicted values with shape = `[batch_size, d0, .. dN]`. #' +#' @param ... +#' For forward/backward compatability. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' @inherit metric_binary_accuracy return #' @export -metric_sparse_categorical_crossentropy <- py_metric_wrapper( - sparse_categorical_crossentropy, SparseCategoricalCrossentropy, - alist(from_logits=FALSE, axis = -1L) -) - +#' @family losses +#' @family metrics +#' @family regression metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.MeanSquaredLogarithmicError +metric_mean_squared_logarithmic_error <- +function (y_true, y_pred, ..., name = "mean_squared_logarithmic_error", + dtype = NULL) +{ + args <- capture_args(list(y_true = function (x) + if (is_py_object(x)) + x + else np_array(x), y_pred = function (x) + if (is_py_object(x)) + x + else np_array(x))) + callable <- if (missing(y_true) && missing(y_pred)) + keras$metrics$MeanSquaredLogarithmicError + else keras$metrics$mean_squared_logarithmic_error + do.call(callable, args) +} -#' Computes the squared hinge metric +#' Computes R2 score. #' -#' `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are -#' provided we will convert them to -1 or 1. +#' @description +#' Formula: #' +#' ```{r, eval = FALSE} +#' sum_squares_residuals <- sum((y_true - y_pred) ** 2) +#' sum_squares <- sum((y_true - mean(y_true)) ** 2) +#' R2 <- 1 - sum_squares_residuals / sum_squares +#' ``` #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics -#' @export -metric_squared_hinge <- py_metric_wrapper( - squared_hinge, SquaredHinge -) - - - -#' Computes the categorical hinge metric between `y_true` and `y_pred` +#' This is also called the +#' [coefficient of determination]( +#' https://en.wikipedia.org/wiki/Coefficient_of_determination). #' -#' @inheritParams metric-or-Metric -#' @inherit Metric return -#' @family metrics -#' @export -metric_categorical_hinge <- py_metric_wrapper( - NULL, CategoricalHinge -) - - - -#' Calculates how often predictions match integer labels +#' It indicates how close the fitted regression line +#' is to ground-truth data. #' -#' @details -#' ```r -#' acc = k_dot(sample_weight, y_true == k_argmax(y_pred, axis=2)) +#' - The highest score possible is 1.0. It indicates that the predictors +#' perfectly accounts for variation in the target. +#' - A score of 0.0 indicates that the predictors do not +#' account for variation in the target. +#' - It can also be negative if the model is worse than random. +#' +#' This metric can also compute the "Adjusted R2" score. +#' +#' # Examples +#' ```{r} +#' y_true <- rbind(1, 4, 3) +#' y_pred <- rbind(2, 4, 4) +#' metric <- metric_r2_score() +#' metric$update_state(y_true, y_pred) +#' metric$result() #' ``` #' -#' You can provide logits of classes as `y_pred`, since argmax of -#' logits and probabilities are same. +#' @param class_aggregation +#' Specifies how to aggregate scores corresponding to +#' different output classes (or target dimensions), +#' i.e. different dimensions on the last axis of the predictions. +#' Equivalent to `multioutput` argument in Scikit-Learn. +#' Should be one of +#' `NULL` (no aggregation), `"uniform_average"`, +#' `"variance_weighted_average"`. #' -#' This metric creates two local variables, `total` and `count` that are used to -#' compute the frequency with which `y_pred` matches `y_true`. This frequency is -#' ultimately returned as `sparse categorical accuracy`: an idempotent operation -#' that simply divides `total` by `count`. +#' @param num_regressors +#' Number of independent regressors used +#' ("Adjusted R2" score). 0 is the standard R2 score. +#' Defaults to `0`. #' -#' If `sample_weight` is `NULL`, weights default to 1. -#' Use `sample_weight` of 0 to mask values. +#' @param name +#' Optional. string name of the metric instance. #' +#' @param dtype +#' Optional. data type of the metric result. #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return #' @export -metric_sparse_categorical_accuracy <- py_metric_wrapper( - sparse_categorical_accuracy, SparseCategoricalAccuracy -) - +#' @family regression metrics +#' @family metrics +# @seealso +# + +#' +#' @tether keras.metrics.R2Score +metric_r2_score <- +function (..., class_aggregation = "uniform_average", num_regressors = 0L, + name = "r2_score", dtype = NULL) +{ + args <- capture_args(list(num_regressors = as_integer)) + do.call(keras$metrics$R2Score, args) +} -#' Computes how often targets are in the top `K` predictions +#' Computes root mean squared error metric between `y_true` and `y_pred`. #' +#' @description +#' Formula: #' -#' @param k (Optional) Number of top elements to look at for computing accuracy. -#' Defaults to 5. +#' ```{r, eval = FALSE} +#' loss <- sqrt(mean((y_pred - y_true) ^ 2)) +#' ``` #' +#' # Examples +#' Standalone usage: #' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' ```{r} +#' m <- metric_root_mean_squared_error() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0))) +#' m$result() +#' ``` +#' +#' ```{r} +#' m$reset_state() +#' m$update_state(rbind(c(0, 1), c(0, 0)), rbind(c(1, 1), c(0, 0)), +#' sample_weight = c(1, 0)) +#' m$result() +#' ``` +#' +#' Usage with `compile()` API: +#' +#' ```{r, eval = FALSE} +#' model %>% compile( +#' optimizer = 'sgd', +#' loss = 'mse', +#' metrics = list(metric_root_mean_squared_error())) +#' ``` +#' +#' @param name +#' (Optional) string name of the metric instance. +#' +#' @param dtype +#' (Optional) data type of the metric result. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inherit metric_auc return #' @export -metric_top_k_categorical_accuracy <- py_metric_wrapper( - top_k_categorical_accuracy, TopKCategoricalAccuracy, - alist(k=5L), - list(k=as.integer) -) +#' @family regression metrics +#' @family metrics +#' @seealso +#' + +# + +#' +#' @tether keras.metrics.RootMeanSquaredError +metric_root_mean_squared_error <- +function (..., name = "root_mean_squared_error", dtype = NULL) +{ + args <- capture_args() + do.call(keras$metrics$RootMeanSquaredError, args) +} -#' Computes how often integer targets are in the top `K` predictions -#' -#' @param k (Optional) Number of top elements to look at for computing accuracy. -#' Defaults to 5. -#' -#' @inheritParams metric-or-Metric -#' @inherit metric-or-Metric return -#' @family metrics +#' @importFrom reticulate py_to_r_wrapper #' @export -metric_sparse_top_k_categorical_accuracy <- py_metric_wrapper( - sparse_top_k_categorical_accuracy, SparseTopKCategoricalAccuracy, - alist(k=5L), - list(k=as.integer) -) +#' @keywords internal +py_to_r_wrapper.keras.src.metrics.metric.Metric <- py_to_r_wrapper.keras.src.losses.loss.Loss + + +# -------------------------------------------------------------------------------- + + + +# .metric_return_roxygen <- function(has_function_handle = FALSE) { +# if(has_function_handle) { +# r"---( +# @returns +# If `y_true` and `y_pred` are missing, a (subclassed) `Metric` +# instance is returned. The `Metric` object can be passed directly to +# `compile(metrics = )` or used as a standalone object. See `?`[`Metric`] for +# example usage. +# +# Alternatively, if called with `y_true` and `y_pred` arguments, then the +# computed case-wise values for the mini-batch are returned directly. +# )---" +# } else { +# r"---( +# @returns +# A (subclassed) `Metric` instance that can be passed directly to +# `compile(metrics = )`, or used as a standalone object. See `?`[`Metric`] for +# example usage. +# )---" +# } +# } #' Custom metric function #' #' @param name name used to show training progress output -#' @param metric_fn An R function with signature `function(y_true, y_pred){}` that accepts tensors. +#' @param metric_fn An R function with signature `function(y_true, y_pred)` +#' that accepts tensors. #' #' @details #' You can provide an arbitrary R function as a custom metric. Note that #' the `y_true` and `y_pred` parameters are tensors, so computations on -#' them should use backend tensor functions. +#' them should use `op_*` tensor functions. #' #' Use the `custom_metric()` function to define a custom metric. -#' Note that a name ('mean_pred') is provided for the custom metric +#' Note that a name (`'mean_pred'`) is provided for the custom metric #' function: this name is used within training progress output. #' #' If you want to save and load a model with custom metrics, you should -#' also specify the metric in the call the [load_model_hdf5()]. For example: -#' `load_model_hdf5("my_model.h5", c('mean_pred' = metric_mean_pred))`. +#' also call [`register_keras_serializable()`], or +#' specify the metric in the call the [load_model()]. For example: +#' `load_model("my_model.keras", c('mean_pred' = metric_mean_pred))`. #' #' Alternatively, you can wrap all of your code in a call to #' [with_custom_object_scope()] which will allow you to refer to the #' metric by name just like you do with built in keras metrics. #' -#' Documentation on the available backend tensor functions can be -#' found at . #' #' Alternative ways of supplying custom metrics: #' + `custom_metric():` Arbitrary R function. #' + [metric_mean_wrapper()]: Wrap an arbitrary R function in a `Metric` instance. -#' + subclass `keras$metrics$Metric`: see `?Metric` for example. +#' + Create a custom [`Metric()`] subclass. #' +#' @returns A callable function with a `__name__` attribute. #' @family metrics #' @export custom_metric <- function(name, metric_fn) { - metric_fn <- reticulate::py_func(metric_fn) - reticulate::py_set_attr(metric_fn, "__name__", name) - metric_fn + py_func2(metric_fn, convert = TRUE, name = name) } - - -#' (Deprecated) metric_cosine_proximity -#' -#' `metric_cosine_proximity()` is deprecated and will be removed in a future -#' version. Please update your code to use `metric_cosine_similarity()` if -#' possible. If you need the actual function and not a Metric object, (e.g, -#' because you are using the intermediate computed values in a custom training -#' loop before reduction), please use `loss_cosine_similarity()` or -#' `tensorflow::tf$compat$v1$keras$metrics$cosine_proximity()` -#' -#' @inheritParams metric-or-Metric -#' @keywords internal -#' @export -metric_cosine_proximity <- function(y_true, y_pred) { - warning( -"metric_cosine_proximity() is deprecated and will be removed in a future version.", -" Please update your code to use metric_cosine_similarity() if possible.", -" If you need the actual function and not a Metric object,", -" (e.g, because you are using the intermediate computed values", -" in a custom training loop before reduction), please use loss_cosine_similarity() or", -" tensorflow::tf$compat$v1$keras$metrics$cosine_proximity()") - tensorflow::tf$compat$v1$keras$metrics$cosine_proximity(y_true, y_pred) +# TODO: export ?? +# can be used w/ activations, regularizers, metrics, loss, anything else +# where it helps to have a name +custom_fn <- function(name, fn) { + py_func2(fn, TRUE, name) } -attr(metric_cosine_proximity, "py_function_name") <- "cosine_proximity" - - - - -### some interactive snippets use to autogenerate the starters for docs above. -### There is still quite a bit of manual massaging the docs needed after this. -# library(tidyverse) -# -# inspect <- reticulate::import("inspect") -# -# docstring_parser <- reticulate::import("docstring_parser") -# # reticulate::py_install("docstring_parser", pip = TRUE) -# -# get_doc <- function(py_obj) { -# doc <- docstring_parser$parse( -# inspect$getdoc(py_obj)) -# doc$object <- py_obj -# doc -# # style = docstring_parser$DocstringStyle$GOOGLE) -# # ## not all doc strings successfully parse google style, -# # ## some default to REST style -# } -# -# -# py_str.docstring_parser.common.Docstring <- function(x) { -# cat(docstring_parser$compose(x)) -# } -# -# -# cleanup_description <- function(x) { -# -# # remove leading and trailing whitespace -# x <- gsub("^\\s+|\\s+$", "", x) -# -# # convert 2+ whitespace to 1 ws -# # x <- gsub("(\\s\\s+)", " ", x) -# -# # convert literals -# x <- gsub("None", "NULL", x, fixed=TRUE) -# x <- gsub("True", "TRUE", x, fixed=TRUE) -# x <- gsub("False", "FALSE", x, fixed=TRUE) -# -# # convert tuple to list -# x <- gsub("tuple", "list", x, fixed=TRUE) -# x <- gsub("list/list", "list", x, fixed=TRUE) -# -# x -# } -# -# as_metric_fn_doc <- function(x, name = NULL) { -# con <- textConnection("r-doc", "w") -# on.exit(close(con)) -# cat <- function(..., file = con) -# base::cat(..., "\n", file = file) -# -# # first sentence is taken as title -# # 2nd paragraph is taken as @description -# # 3rd paragraph + is taken as @details -# title <- cleanup_description(x$short_description) -# # title should have no trailing '.' -# if (str_sub(title, -1) == ".") -# title <- str_sub(title, end = -2) -# -# # cat("@title ", title) -# cat(title) -# -# desc <- cleanup_description(x$long_description) -# cat() -# -# # avoid splitting across @description and @details, -# # so put everything in @details -# if (length(desc) != 0 && str_detect(desc, "\n")) -# cat("@details") -# cat(desc) -# -# for (p in x$params) { -# if (p$arg_name %in% c("name", "dtype")) next -# cat("\n@param", p$arg_name, cleanup_description(p$description)) -# } -# -# cat() -# -# cat("@inheritParams Metric") -# cat("@inherit Metric return") -# cat("@family metrics") -# cat("@export") -# -# x <- textConnectionValue(con) -# x <- stringr::str_flatten(x, "\n") -# x <- gsub("\n", "\n#' ", x) -# x <- str_c("#' ", x, "\n", name) -# x -# } -# -# x <- keras$metrics$AUC -# as_metric_fn_doc(get_doc(x)) %>% cat() -# -# if(!exists("scratch")) -# scrtch <- tempfile(fileext = ".R") -# keras$metrics %>% -# names() %>% -# grep("[A-Z]", ., value=TRUE) %>% -# map(~as_metric_fn_doc(get_doc(keras$metrics[[.x]]), name = .x)) %>% -# str_flatten(collapse = "\n\n\n") %>% -# cat(file = scrtch) -# -# file.edit(scratch) diff --git a/R/model-creation.R b/R/model-creation.R new file mode 100644 index 0000000000..c8da5f95af --- /dev/null +++ b/R/model-creation.R @@ -0,0 +1,348 @@ + +#' Keras Model (Functional API) +#' +#' A model is a directed acyclic graph of layers. +#' +#' @param inputs Input tensor(s) (from [`keras_input()`]) +#' @param outputs Output tensors (from calling layers with `inputs`) +#' @param ... Any additional arguments +#' +#' @details +#' +#' # Examples +#' ```{r} +#' library(keras3) +#' +#' # input tensor +#' inputs <- keras_input(shape = c(784)) +#' +#' # outputs compose input + dense layers +#' predictions <- inputs |> +#' layer_dense(units = 64, activation = 'relu') |> +#' layer_dense(units = 64, activation = 'relu') |> +#' layer_dense(units = 10, activation = 'softmax') +#' +#' # create and compile model +#' model <- keras_model(inputs = inputs, outputs = predictions) +#' model |> compile( +#' optimizer = 'rmsprop', +#' loss = 'categorical_crossentropy', +#' metrics = c('accuracy') +#' ) +#' ``` +#' +#' @returns A `Model` instance. +#' @export +#' @family model functions +#' @family model creation +#' @tether keras.Model +keras_model <- function(inputs = NULL, outputs = NULL, ...) { + keras$models$Model(inputs = inputs, outputs = outputs, ...) +} + + +#' Create a Keras tensor (Functional API input). +#' +#' @description +#' A Keras tensor is a symbolic tensor-like object, which we augment with +#' certain attributes that allow us to build a Keras model just by knowing the +#' inputs and outputs of the model. +#' +#' For instance, if `a`, `b` and `c` are Keras tensors, +#' it becomes possible to do: +#' `model <- keras_model(input = c(a, b), output = c)` +#' +#' # Examples +#' ```{r} +#' # This is a logistic regression in Keras +#' input <- layer_input(shape=c(32)) +#' output <- input |> layer_dense(16, activation='softmax') +#' model <- keras_model(input, output) +#' ``` +#' +#' @returns +#' A Keras tensor, +#' which can passed to the `inputs` argument of ([`keras_model()`]). +#' +#' @param shape +#' A shape list (list of integers or `NULL` objects), +#' not including the batch size. +#' For instance, `shape = c(32)` indicates that the expected input +#' will be batches of 32-dimensional vectors. Elements of this list +#' can be `NULL` or `NA`; `NULL`/`NA` elements represent dimensions where the shape +#' is not known and may vary (e.g. sequence length). +#' +#' @param batch_size +#' Optional static batch size (integer). +#' +#' @param dtype +#' The data type expected by the input, as a string +#' (e.g. `"float32"`, `"int32"`...) +#' +#' @param sparse +#' A boolean specifying whether the expected input will be sparse +#' tensors. Note that, if `sparse` is `FALSE`, sparse tensors can still +#' be passed into the input - they will be densified with a default +#' value of 0. This feature is only supported with the TensorFlow +#' backend. Defaults to `FALSE`. +#' +#' @param name +#' Optional name string for the layer. +#' Should be unique in a model (do not reuse the same name twice). +#' It will be autogenerated if it isn't provided. +#' +#' @param tensor +#' Optional existing tensor to wrap into the `Input` layer. +#' If set, the layer will use this tensor rather +#' than creating a new placeholder tensor. +#' +#' @param batch_shape +#' Shape, including the batch dim. +#' +#' @export +#' @family model creation +# @seealso +# + +#' +#' @tether keras.layers.Input +keras_input <- +function (shape = NULL, batch_size = NULL, dtype = NULL, sparse = NULL, + batch_shape = NULL, name = NULL, tensor = NULL) +{ + args <- capture_args(list(shape = normalize_shape, batch_size = as_integer, + input_shape = normalize_shape, batch_input_shape = normalize_shape, + batch_shape = normalize_shape)) + do.call(keras$Input, args) +} + + + + +#' Keras Model composed of a linear stack of layers +#' +#' @param input_shape +#' A shape integer vector, +#' not including the batch size. +#' For instance, `shape=c(32)` indicates that the expected input +#' will be batches of 32-dimensional vectors. Elements of this shape +#' can be `NA`; `NA` elements represent dimensions where the shape +#' is not known and may vary (e.g. sequence length). +#' +#' @param name Name of model +#' +#' @param input_batch_size Optional static batch size (integer). +#' +#' @param input_dtype +#' The data type expected by the input, as a string +#' (e.g. `"float32"`, `"int32"`...) +#' +#' @param input_sparse +#' A boolean specifying whether the expected input will be sparse +#' tensors. Note that, if `sparse` is `FALSE`, sparse tensors can still +#' be passed into the input - they will be densified with a default +#' value of `0`. This feature is only supported with the TensorFlow +#' backend. Defaults to `FALSE`. +#' +#' @param input_batch_shape +#' An optional way to specify `batch_size` and `input_shape` as one argument. +#' +#' @param input_name +#' Optional name string for the input layer. +#' Should be unique in a model (do not reuse the same name twice). +#' It will be autogenerated if it isn't provided. +#' +#' @param input_tensor +#' Optional existing tensor to wrap into the `InputLayer`. +#' If set, the layer will use this tensor rather +#' than creating a new placeholder tensor. +#' +#' @param ... additional arguments passed on to `keras.layers.InputLayer`. +#' +#' @param layers List of layers to add to the model. +#' +#' @param trainable Boolean, whether the model's variables should be trainable. +#' You can also change the trainable status of a model/layer with +#' [`freeze_weights()`] and [`unfreeze_weights()`]. +#' +#' @note +#' +#' If `input_shape` is omitted, then the model layer +#' shapes, including the final model output shape, will not be known until +#' the model is built, either by calling the model with an input tensor/array +#' like `model(input)`, (possibly via `fit()`/`evaluate()`/`predict()`), or by +#' explicitly calling `model$build(input_shape)`. +#' +#' @details +#' +#' # Examples +#' +#' ```{r} +#' model <- keras_model_sequential(input_shape = c(784)) +#' model |> +#' layer_dense(units = 32) |> +#' layer_activation('relu') |> +#' layer_dense(units = 10) |> +#' layer_activation('softmax') +#' +#' model |> compile( +#' optimizer = 'rmsprop', +#' loss = 'categorical_crossentropy', +#' metrics = c('accuracy') +#' ) +#' +#' model +#' ``` +#' +#' @returns A `Sequential` model instance. +#' @export +#' @family model functions +#' @family model creation +#' @tether keras.Sequential +keras_model_sequential <- +function(input_shape = NULL, name = NULL, + ..., + input_dtype = NULL, + input_batch_size = NULL, + input_sparse = NULL, + input_batch_shape = NULL, + input_name = NULL, + input_tensor = NULL, + trainable = TRUE, + layers = list()) +{ + args <- capture_args(list(layers = as_list)) + + Sequental_arg_names <- c("layers", "name", "trainable") + Sequental_args <- args[intersect(names(args), Sequental_arg_names)] + InputLayer_args <- args[setdiff(names(args), Sequental_arg_names)] + + if (length(InputLayer_args)) { + # If we received `layers` for the first positional arg, throw a nicer error + # message. (The first positional arg used to be `layers`.) + if (is_layer(input_shape) || + (is.list(input_shape) && any(map_lgl(input_shape, is_layer)))) + stop("`layers` must be passed in as a named argument.") + + prepend(Sequental_args$layers) <- do.call(InputLayer, InputLayer_args) + } + + do.call(keras$models$Sequential, Sequental_args) +} + + + + +#' @tether keras.layers.InputLayer +InputLayer <- +function(input_shape = NULL, + ..., + input_batch_size = NULL, + input_dtype = NULL, + input_sparse = NULL, + input_batch_shape = NULL, + input_name = NULL, + input_tensor = NULL) +{ + args <- capture_args(list( + input_shape = normalize_shape, + shape = normalize_shape, + + batch_shape = normalize_shape, + input_batch_shape = normalize_shape, + batch_input_shape = normalize_shape, + + input_batch_size = as_integer, + batch_size = as_integer + )) + + args <- rename(args, + name = "input_layer_name", # legacy + name = "input_name", + + shape = "input_shape", + + batch_shape = "batch_input_shape", # legacy + batch_shape = "input_batch_shape", + + batch_size = "input_batch_size", + + dtype = "input_dtype", + + sparse = "input_sparse", + + .skip_existing = TRUE) + + do.call(keras$layers$InputLayer, args) +} + +#' Clone a model instance. +#' +#' Model cloning is similar to calling a model on new inputs, except that it +#' creates new layers (and thus new weights) instead of sharing the weights of +#' the existing layers. +#' +#' @param model Instance of Keras model (could be a functional model or a +#' Sequential model). +#' @param input_tensors Optional list of input tensors to build the model upon. +#' If not provided, placeholders will be created. +#' @param clone_function Callable to be used to clone each layer in the target +#' model (except `InputLayer` instances). It takes as argument the layer +#' instance to be cloned, and returns the corresponding layer instance to be +#' used in the model copy. If unspecified, this callable defaults to the +#' following serialization/deserialization function: +#' +#' ```function(layer) layer$`__class__`$from_config(layer$get_config())``` +#' +#' By passing a custom callable, you can customize your copy of the model, +#' e.g. by wrapping certain layers of interest (you might want to replace all +#' LSTM instances with equivalent `Bidirectional(LSTM(...))` instances, for +#' example). +#' +#' @returns A new model instance. +#' +#' @export +clone_model <- function(model, input_tensors = NULL, clone_function = NULL) { + args <- capture_args() + do.call(keras$models$clone_model, args) +} + + +# ---- Model methods ---- + + +#' Retrieves a layer based on either its name (unique) or index. +#' +#' Indices are based on order of horizontal graph traversal (bottom-up) and are +#' 1-based. If `name` and `index` are both provided, `index` will take +#' precedence. +#' +#' @param object Keras model object +#' @param name String, name of layer. +#' @param index Integer, index of layer (1-based). Also valid are negative +#' values, which count from the end of model. +#' +#' @returns A layer instance. +#' +#' @family model functions +#' +#' @export +get_layer <- function(object, name = NULL, index = NULL) { + object$get_layer( + name = name, + index = as_layer_index(index) + ) +} + + +#' Remove the last layer in a Sequential model +#' +#' @param object Sequential keras model object +#' @returns The input `object`, invisibly. +#' +#' @family model functions +#' +#' @export +pop_layer <- function(object) { + object$pop() + invisible(object) +} diff --git a/R/model-custom.R b/R/model-custom.R deleted file mode 100644 index 694982a49f..0000000000 --- a/R/model-custom.R +++ /dev/null @@ -1,56 +0,0 @@ - -#' (Deprecated) Create a Keras custom model -#' -#' `keras_model_custom()` is soft-deprecated. Please define custom models by -#' subclassing `keras$Model` directly using [`%py_class%`] or [`R6::R6Class()`]. -#' -#' @param model_fn Function that returns an R custom model -#' @param name Optional name for model -#' -#' @return A Keras model -#' -#' @details For documentation on using custom models, see -#' . -#' -#' @keywords internal -#' @export -keras_model_custom <- function(model_fn, name = NULL) { - - # verify version - if (is_tensorflow_implementation() && keras_version() < "2.1.6") - stop("Custom models require TensorFlow v1.9 or higher") - else if (!is_tensorflow_implementation() && keras_version() < "2.2.0") - stop("Custom models require Keras v2.2 or higher") - - # create the python subclass - python_path <- system.file("python", package = "keras") - tools <- import_from_path("kerastools", path = python_path) - model <- tools$model$RModel(name = name) - - # call the R model function - r_model_call <- model_fn(model) - - # set the _r_call for delegation - model$`_r_call` <- r_model_call - - # return model - model -} - -#' @export -print.kerastools.model.RModel <- function(x, ...) { - if (!x$built) { - cat("Custom Keras model: not yet fitted") - return(invisible(x)) - } - NextMethod() -} - -#' @export -summary.kerastools.model.RModel <- function(object, ...) { - if (!object$built) { - cat("This custom model has not yet been built. To see a summary, compile and fit with some data.") - return(invisible(NULL)) - } - NextMethod() -} diff --git a/R/model-legacy.R b/R/model-legacy.R deleted file mode 100644 index b8387901c3..0000000000 --- a/R/model-legacy.R +++ /dev/null @@ -1,120 +0,0 @@ -fit_generator_legacy <- function(object, generator, steps_per_epoch, epochs = 1, - verbose=getOption("keras.fit_verbose", default = 1), callbacks = NULL, - view_metrics = getOption("keras.view_metrics", default = "auto"), - validation_data = NULL, validation_steps = NULL, - class_weight = NULL, max_queue_size = 10, workers = 1, initial_epoch = 0) { - - # resolve view_metrics - if (identical(view_metrics, "auto")) - view_metrics <- resolve_view_metrics(verbose, epochs, object$metrics) - - if (is.list(validation_data)) - validation_data <- do.call(reticulate::tuple, keras_array(validation_data)) - - history <- call_generator_function(object$fit_generator, list( - generator = generator, - steps_per_epoch = as.integer(steps_per_epoch), - epochs = as.integer(epochs), - verbose = as.integer(verbose), - callbacks = normalize_callbacks_with_metrics(view_metrics, initial_epoch, callbacks), - validation_data = validation_data, - validation_steps = as_nullable_integer(validation_steps), - class_weight = as_class_weight(class_weight), - max_queue_size = as.integer(max_queue_size), - workers = as.integer(workers), - initial_epoch = as.integer(initial_epoch) - )) - - # convert to a keras_training history object - history <- to_keras_training_history(history) - - # write metadata from history - write_history_metadata(history) - - # return the history invisibly - invisible(history) -} - -evaluate_generator_legacy <- function(object, generator, steps, max_queue_size = 10, workers = 1, - callbacks = NULL) { - - args <- list( - generator = generator, - steps = as.integer(steps), - max_queue_size = as.integer(max_queue_size), - workers = as.integer(workers) - ) - - args <- resolve_callbacks(args, callbacks) - - # perform evaluation - result <- call_generator_function(object$evaluate_generator, args) - - # apply names - names(result) <- object$metrics_names - - # write run data - tfruns::write_run_metadata("evaluation", result) - - # return result - result -} - -predict_generator_legacy <- function(object, generator, steps, max_queue_size = 10, workers = 1, verbose = 0, - callbacks = NULL) { - - args <- list( - generator = generator, - steps = as.integer(steps), - max_queue_size = as.integer(max_queue_size), - workers = as.integer(workers) - ) - - if (keras_version() >= "2.0.1") - args$verbose <- as.integer(verbose) - - args <- resolve_callbacks(args, callbacks) - - call_generator_function(object$predict_generator, args) -} - -call_generator_function <- function(func, args) { - - # check if any generators should run on the main thread - use_main_thread_generator <- - is_main_thread_generator(args$generator) || - is_main_thread_generator(args$validation_data) - - # handle generators - args$generator <- as_generator(args$generator) - if (!is.null(args$validation_data)) - args$validation_data <- as_generator(args$validation_data) - - # force use of thread based concurrency - if (keras_version() >= "2.0.6") { - args$use_multiprocessing <- FALSE - } else { - args$max_q_size <- args$max_queue_size - args$max_queue_size <- NULL - args$pickle_safe <- FALSE - } - - # if it's a main thread generator then force workers to correct value - if (use_main_thread_generator) { - - # error to use workers > 1 for main thread generator - if (args$workers > 1) { - stop('You may not specify workers > 1 for R based generator functions (R ', - 'generators must run on the main thread)', call. = FALSE) - } - - # set workers to 0 for versions of keras that support this - if (keras_version() >= "2.1.2") - args$workers = 0L - else - args$workers = 1L - } - - # call the generator - do.call(func, args) -} diff --git a/R/model-persistence.R b/R/model-persistence.R index 665bca1f3f..9e959068f4 100644 --- a/R/model-persistence.R +++ b/R/model-persistence.R @@ -1,562 +1,926 @@ -#' Save/Load models using HDF5 files +#' Saves a model as a `.keras` file. #' -#' @param object Model object to save -#' @param filepath File path -#' @param compile Whether to compile the model after loading. -#' @param overwrite Overwrite existing file if necessary -#' @param include_optimizer If `TRUE`, save optimizer's state. -#' @param custom_objects Mapping class names (or function names) of custom -#' (non-Keras) objects to class/functions (for example, custom metrics -#' or custom loss functions). This mapping can be done with the dict() -#' function of reticulate. +#' @description #' -#' @details The following components of the model are saved: +#' # Examples +#' ```{r} +#' model <- keras_model_sequential(input_shape = c(3)) |> +#' layer_dense(5) |> +#' layer_activation_softmax() #' -#' - The model architecture, allowing to re-instantiate the model. -#' - The model weights. -#' - The state of the optimizer, allowing to resume training exactly where you -#' left off. -#' This allows you to save the entirety of the state of a model -#' in a single file. +#' model |> save_model("model.keras") +#' loaded_model <- load_model("model.keras") +#' ``` +#' ```{r, results = 'hide'} +#' x <- random_uniform(c(10, 3)) +#' stopifnot(all.equal( +#' model |> predict(x), +#' loaded_model |> predict(x) +#' )) +#' ``` #' -#' Saved models can be reinstantiated via `load_model_hdf5()`. The model returned by -#' `load_model_hdf5()` is a compiled model ready to be used (unless the saved model -#' was never compiled in the first place or `compile = FALSE` is specified). +#' The saved `.keras` file contains: #' -#' As an alternative to providing the `custom_objects` argument, you can -#' execute the definition and persistence of your model using the -#' [with_custom_object_scope()] function. +#' - The model's configuration (architecture) +#' - The model's weights +#' - The model's optimizer's state (if any) #' -#' @note The [serialize_model()] function enables saving Keras models to -#' R objects that can be persisted across R sessions. +#' Thus models can be reinstantiated in the exact same state. #' -#' @family model persistence +#' ```{r} +#' zip::zip_list("model.keras")[, "filename"] +#' ``` #' -#' @export -save_model_hdf5 <- function(object, filepath, overwrite = TRUE, include_optimizer = TRUE) { - - if (!have_h5py()) - stop("The h5py Python package is required to save and load models") - - filepath <- normalize_path(filepath) - - args <- list( - model = object, - filepath = filepath, - overwrite = overwrite, - include_optimizer = include_optimizer - ) - - if (tensorflow::tf_version() >= "1.14.0" && !is_backend("plaidml")) { - args[["save_format"]] <- "h5" - } - - if (confirm_overwrite(filepath, overwrite)) { - do.call(keras$models$save_model, args) - invisible(TRUE) - } else { - invisible(FALSE) - } -} - -#' Save/Load models using SavedModel format +#' ```{r, include = FALSE} +#' unlink("model.keras") +#' ``` +#' +#' @param model a keras model. +#' +#' @param filepath +#' string, +#' Path where to save the model. Must end in `.keras`. #' -#' @inheritParams save_model_hdf5 -#' @param signatures Signatures to save with the SavedModel. Please see the signatures -#' argument in `tf$saved_model$save` for details. -#' @param options Optional `tf$saved_model$SaveOptions` object that specifies options -#' for saving to SavedModel +#' @param overwrite +#' Whether we should overwrite any existing model +#' at the target location, or instead ask the user +#' via an interactive prompt. #' -#' @family model persistence +#' @param ... +#' For forward/backward compatability. #' +#' @param model A keras model. +#' +#' @returns If `filepath` is provided, then this function is called primarily +#' for side effects, and `model` is returned invisibly. If `filepath` is not +#' provided or `NULL`, then the serialized model is returned as an R raw +#' vector. #' @export -save_model_tf <- function(object, filepath, overwrite = TRUE, include_optimizer = TRUE, - signatures = NULL, options = NULL) { - - if (tensorflow::tf_version() < "2.0.0") - stop("save_model_tf only works with TF >= 2.0.0", call.=FALSE) - - filepath <- normalize_path(filepath) - - args <- list( - model = object, - filepath = filepath, - overwrite = overwrite, - include_optimizer = include_optimizer, - signatures = signatures, - options = options, - save_format = "tf" - ) - - - if (confirm_overwrite(filepath, overwrite)) { - do.call(keras$models$save_model, args) - invisible(TRUE) - } else { - invisible(FALSE) +#' @seealso [load_model()] +#' @family saving and loading functions +#' @tether keras.saving.save_model +# @seealso +# + +save_model <- +function (model, filepath = NULL, overwrite = FALSE, ...) +{ + if(is.null(filepath) -> return_serialized) { + filepath <- tempfile(pattern = "keras_model-", fileext = ".keras") + on.exit(unlink(filepath), add = TRUE) } -} - -#' @rdname save_model_hdf5 -#' @export -load_model_hdf5 <- function(filepath, custom_objects = NULL, compile = TRUE) { - - if (!have_h5py()) - stop("The h5py Python package is required to save and load models") + overwrite <- confirm_overwrite(filepath, overwrite) + keras$saving$save_model(model, filepath, overwrite = overwrite) - load_model(filepath, custom_objects, compile) + if(return_serialized) + readBin(filepath, what = "raw", n = file.size(filepath)) + else + invisible(model) } -#' @rdname save_model_tf -#' @export -load_model_tf <- function(filepath, custom_objects = NULL, compile = TRUE) { - if (tensorflow::tf_version() < "2.0.0") - stop("TensorFlow version >= 2.0.0 is requires to load models in the SavedModel format.", - call. = FALSE) - load_model(filepath, custom_objects, compile) -} -load_model <- function(filepath, custom_objects = NULL, compile = TRUE) { - # prepare custom objects - custom_objects <- objects_with_py_function_names(custom_objects) - # build args dynamically so we can only pass `compile` if it's supported - # (compile requires keras 2.0.4 / tensorflow 1.3) - args <- list( - filepath = normalize_path(filepath), - custom_objects = custom_objects - ) - if (keras_version() >= "2.0.4") - args$compile <- compile - - do.call(keras$models$load_model, args) -} - - -#' Save/Load model weights using HDF5 files +#' Loads a model saved via `save_model()`. #' -#' @param object Model object to save/load -#' @param filepath Path to the file -#' @param overwrite Whether to silently overwrite any existing -#' file at the target location -#' @param by_name Whether to load weights by name or by topological order. -#' @param skip_mismatch Logical, whether to skip loading of layers -#' where there is a mismatch in the number of weights, or a mismatch in the -#' shape of the weight (only valid when `by_name = FALSE`). -#' @param reshape Reshape weights to fit the layer when the correct number -#' of values are present but the shape does not match. +#' @description #' -#' @details The weight file has: -#' - `layer_names` (attribute), a list of strings (ordered names of model layers). -#' - For every layer, a `group` named `layer.name` -#' - For every such layer group, a group attribute `weight_names`, a list of strings -#' (ordered names of weights tensor of the layer). -#' - For every weight in the layer, a dataset storing the weight value, named after -#' the weight tensor. +#' # Examples +#' ```{r} +#' model <- keras_model_sequential(input_shape = c(3)) |> +#' layer_dense(5) |> +#' layer_activation_softmax() #' -#' For `load_model_weights()`, if `by_name` is `FALSE` (default) weights are -#' loaded based on the network's topology, meaning the architecture should be -#' the same as when the weights were saved. Note that layers that don't have -#' weights are not taken into account in the topological ordering, so adding -#' or removing layers is fine as long as they don't have weights. +#' model |> save_model("model.keras") +#' loaded_model <- load_model("model.keras") +#' ``` +#' ```{r, results = 'hide'} +#' x <- random_uniform(c(10, 3)) +#' stopifnot(all.equal( +#' model |> predict(x), +#' loaded_model |> predict(x) +#' )) +#' ``` +#' ```{r, include = FALSE} +#' unlink("model.keras") +#' ``` #' -#' If `by_name` is `TRUE`, weights are loaded into layers only if they share -#' the same name. This is useful for fine-tuning or transfer-learning models -#' where some of the layers have changed. +#' Note that the model variables may have different name values +#' (`var$name` property, e.g. `"dense_1/kernel:0"`) after being reloaded. +#' It is recommended that you use layer attributes to +#' access specific variables, e.g. `model |> get_layer("dense_1") |> _$kernel`. #' - -#' @family model persistence +#' @returns +#' A Keras model instance. If the original model was compiled, +#' and the argument `compile = TRUE` is set, then the returned model +#' will be compiled. Otherwise, the model will be left uncompiled. +#' +#' @param model +#' string, path to the saved model file, +#' or a raw vector, as returned by `save_model(filepath = NULL)` +#' +#' @param custom_objects +#' Optional named list mapping names +#' to custom classes or functions to be +#' considered during deserialization. +#' +#' @param compile +#' Boolean, whether to compile the model after loading. +#' +#' @param safe_mode +#' Boolean, whether to disallow unsafe `lambda` deserialization. +#' When `safe_mode=FALSE`, loading an object has the potential to +#' trigger arbitrary code execution. This argument is only +#' applicable to the Keras v3 model format. Defaults to `TRUE`. #' #' @export -save_model_weights_hdf5 <- function(object, filepath, overwrite = TRUE) { - - if (!have_h5py()) - stop("The h5py Python package is required to save and load model weights") - filepath <- normalize_path(filepath) - if (confirm_overwrite(filepath, overwrite)) { - object$save_weights(filepath = filepath, overwrite = overwrite, - save_format = "h5") - invisible(TRUE) +#' @tether keras.saving.load_model +#' @family saving and loading functions +#' @seealso +#' + +# + +load_model <- +function (model, custom_objects = NULL, compile = TRUE, safe_mode = TRUE) +{ + args <- capture_args(list(custom_objects = normalize_custom_objects), + ignore = "model") + if (is.raw(model)) { + serialized_model <- model + filepath <- tempfile(pattern = "keras_model-", fileext = ".keras") + on.exit(unlink(filepath), add = TRUE) + writeBin(serialized_model, filepath) } else { - invisible(FALSE) + filepath <- model } + keras$saving$load_model(filepath, !!!args) } -#' Save model weights in the SavedModel format + +#' Saves all layer weights to a `.weights.h5` file. #' -#' @inheritParams save_model_weights_hdf5 +#' @param model A keras Model object #' -#' @details -#' When saving in TensorFlow format, all objects referenced by the network -#' are saved in the same format as `tf.train.Checkpoint`, including any Layer instances -#' or Optimizer instances assigned to object attributes. For networks constructed from -#' inputs and outputs using `tf.keras.Model(inputs, outputs)`, Layer instances used by -#' the network are tracked/saved automatically. For user-defined classes which inherit -#' from `tf.keras.Model`, Layer instances must be assigned to object attributes, -#' typically in the constructor. +#' @param filepath +#' string. +#' Path where to save the model. Must end in `.weights.h5`. #' -#' See the documentation of `tf.train.Checkpoint` and `tf.keras.Model` for details. +#' @param overwrite +#' Whether we should overwrite any existing model +#' at the target location, or instead ask the user +#' via an interactive prompt. #' +#' @returns This is called primarily for side effects. `model` is returned, +#' invisibly, to enable usage with the pipe. #' @export -save_model_weights_tf <- function(object, filepath, overwrite = TRUE) { - - if (!is_tensorflow_implementation()) - stop("Save weights to the SavedModel format requires the TensorFlow implementation.") - - if (!tensorflow::tf_version() >= "2.0") - stop("Save weights to the SavedModel format requires TensorFlow version >= 2.0") - - filepath <- normalize_path(filepath) - if (confirm_overwrite(filepath, overwrite)) { - object$save_weights(filepath = filepath, overwrite = overwrite, - save_format = "tf") - invisible(TRUE) - } else { - invisible(FALSE) - } +#' @family saving and loading functions +#' @tether keras.Model.save_weights +#' @seealso +#' + +# + +save_model_weights <- +function (model, filepath, overwrite = FALSE) +{ + overwrite <- confirm_overwrite(filepath, overwrite) + keras$Model$save_weights(model, filepath, overwrite = overwrite) + invisible(model) } -#' @rdname save_model_weights_hdf5 -#' @export -load_model_weights_hdf5 <- function(object, filepath, by_name = FALSE, - skip_mismatch = FALSE, reshape = FALSE) { - if (!have_h5py()) - stop("The h5py Python package is required to save and load model weights") - - args <- list( - filepath = normalize_path(filepath), - by_name = by_name - ) - - if (keras_version() >= "2.1.4" && !is_tensorflow_implementation()) { - args$skip_mismatch <- skip_mismatch - args$reshape <- reshape - } - - do.call(object$load_weights, args) - invisible(object) -} - -#' @inheritParams load_model_weights_hdf5 -#' @rdname save_model_weights_tf +#' Load weights from a file saved via `save_model_weights()`. +#' +#' @description +#' Weights are loaded based on the network's +#' topology. This means the architecture should be the same as when the +#' weights were saved. Note that layers that don't have weights are not +#' taken into account in the topological ordering, so adding or removing +#' layers is fine as long as they don't have weights. +#' +#' **Partial weight loading** +#' +#' If you have modified your model, for instance by adding a new layer +#' (with weights) or by changing the shape of the weights of a layer, +#' you can choose to ignore errors and continue loading +#' by setting `skip_mismatch=TRUE`. In this case any layer with +#' mismatching weights will be skipped. A warning will be displayed +#' for each skipped layer. +#' +#' @param filepath +#' String, path to the weights file to load. +#' It can either be a `.weights.h5` file +#' or a legacy `.h5` weights file. +#' +#' @param skip_mismatch +#' Boolean, whether to skip loading of layers where +#' there is a mismatch in the number of weights, or a mismatch in +#' the shape of the weights. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param model A keras model. +#' +#' @returns This is called primarily for side effects. `model` is returned, +#' invisibly, to enable usage with the pipe. #' @export -load_model_weights_tf <- function(object, filepath, by_name = FALSE, - skip_mismatch = FALSE, reshape = FALSE) { - - args <- list( - filepath = normalize_path(filepath), - by_name = by_name - ) - - if (keras_version() >= "2.1.4" && !is_tensorflow_implementation()) { - args$skip_mismatch <- skip_mismatch - args$reshape <- reshape - } - - do.call(object$load_weights, args) - - invisible(object) +#' @family saving and loading functions +#' @tether keras.Model.load_weights +#' @seealso +#' + +# + +load_model_weights <- +function (model, filepath, skip_mismatch = FALSE, ...) +{ + args <- capture_args(ignore = "model") + do.call(model$load_weights, args) + invisible(model) } - -#' Model configuration as JSON +#' Save and load model configuration as JSON #' #' Save and re-load models configurations as JSON. Note that the representation #' does not include the weights, only the architecture. #' -#' @param object Model object to save -#' @param custom_objects Optional named list mapping names to custom classes or -#' functions to be considered during deserialization. -#' @param json JSON with model configuration +#' Note: `save_model_config()` serializes the model to JSON using +#' `serialize_keras_object()`, not `get_config()`. `serialize_keras_object()` +#' returns a superset of `get_config()`, with additional information needed to +#' create the class object needed to restore the model. See example for how to +#' extract the `get_config()` value from a saved model. +#' +#' # Example +#' +#' ```{r} +#' model <- keras_model_sequential(input_shape = 10) |> layer_dense(10) +#' file <- tempfile("model-config-", fileext = ".json") +#' save_model_config(model, file) +#' +#' # load a new model instance with the same architecture but different weights +#' model2 <- load_model_config(file) +#' +#' stopifnot(exprs = { +#' all.equal(get_config(model), get_config(model2)) #' -#' @family model persistence +#' # To extract the `get_config()` value from a saved model config: +#' all.equal( +#' get_config(model), +#' structure(jsonlite::read_json(file)$config, +#' "__class__" = keras_model_sequential()$`__class__`) +#' ) +#' }) +#' ``` #' +#' @param model Model object to save +#' @param custom_objects Optional named list mapping names to custom classes or +#' functions to be considered during deserialization. +#' @param filepath path to json file with the model config. +#' @param overwrite +#' Whether we should overwrite any existing model configuration json +#' at `filepath`, or instead ask the user +#' via an interactive prompt. +#' +#' @returns This is called primarily for side effects. `model` is returned, +#' invisibly, to enable usage with the pipe. +#' @family saving and loading functions +#' @tether keras.Model.to_json #' @export -model_to_json <- function(object) { - object$to_json() +save_model_config <- function(model, filepath = NULL, overwrite = FALSE) +{ + confirm_overwrite(filepath, overwrite) + writeLines(model$to_json(), filepath) + invisible(model) } -#' @rdname model_to_json +#' @rdname save_model_config #' @export -model_from_json <- function(json, custom_objects = NULL) { - keras$models$model_from_json(json, custom_objects) +#' @tether keras.models.model_from_json +load_model_config <- function(filepath, custom_objects = NULL) +{ + json <- paste0(readLines(filepath), collapse = "\n") + keras$models$model_from_json(json, normalize_custom_objects(custom_objects)) } -#' Model configuration as YAML +#' Create a TF SavedModel artifact for inference (e.g. via TF-Serving). #' -#' Save and re-load models configurations as YAML Note that the representation -#' does not include the weights, only the architecture. -# -#' @inheritParams model_to_json +#' @description +#' (e.g. via TF-Serving). #' -#' @param yaml YAML with model configuration +#' **Note:** This can currently only be used with +#' the TensorFlow or JAX backends. #' -#' @family model persistence +#' This method lets you export a model to a lightweight SavedModel artifact +#' that contains the model's forward pass only (its `call()` method) +#' and can be served via e.g. TF-Serving. The forward pass is registered +#' under the name `serve()` (see example below). #' -#' @export -model_to_yaml <- function(object) { - warning("The ability to serialize models to/from yaml was removed in Tensorflow due to security risk of arbitrary code execution. Please us `model_to_json()` instead.") - - if (!have_pyyaml()) - stop("The pyyaml Python package is required to save and load models as YAML") - - object$to_yaml() +#' The original code of the model (including any custom layers you may +#' have used) is *no longer* necessary to reload the artifact -- it is +#' entirely standalone. +#' +#' # Examples +#' ```r +#' # Create the artifact +#' model |> tensorflow::export_savedmodel("path/to/location") +#' +#' # Later, in a different process / environment... +#' library(tensorflow) +#' reloaded_artifact <- tf$saved_model$load("path/to/location") +#' predictions <- reloaded_artifact$serve(input_data) +#' +#' # see tfdeploy::serve_savedmodel() for serving a model over a local web api. +#' ``` +#' +# If you would like to customize your serving endpoints, you can +# use the lower-level `import("keras").export.ExportArchive` class. The +# `export()` method relies on `ExportArchive` internally. +#' +#' @param export_dir_base +#' string, file path where to save +#' the artifact. +#' +#' @param ... For forward/backward compatability. +#' +#' @param object A keras model. +#' +#' @returns This is called primarily for the side effect of exporting `object`. +#' The first argument, `object` is also returned, invisibly, to enable usage +#' with the pipe. +#' +#' @exportS3Method tensorflow::export_savedmodel +#' @tether keras.Model.export +#' @family saving and loading functions +# @seealso +# + +export_savedmodel.keras.src.models.model.Model <- function(object, export_dir_base, ...) { + object$export(export_dir_base, ...) + invisible(object) } -#' @rdname model_to_yaml -#' @export -model_from_yaml <- function(yaml, custom_objects = NULL) { - warning("The ability to serialize models to/from yaml was removed in Tensorflow due to security risk of arbitrary code execution. Please us `model_to_json()` instead.") - if (!have_pyyaml()) - stop("The pyyaml Python package is required to save and load models as YAML") - keras$models$model_from_yaml(yaml, custom_objects) -} -#' Serialize a model to an R object +#' Reload a Keras model/layer that was saved via `export_savedmodel()`. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' model <- keras_model_sequential(input_shape = c(784)) |> layer_dense(10) +#' model |> export_savedmodel("path/to/artifact") +#' reloaded_layer <- layer_tfsm(filepath = "path/to/artifact") +#' input <- random_normal(c(2, 784)) +#' output <- reloaded_layer(input) +#' stopifnot(all.equal(as.array(output), as.array(model(input)))) +#' ``` +#' ```{r, include = FALSE} +#' unlink("path", recursive = TRUE) +#' ``` +#' +#' The reloaded object can be used like a regular Keras layer, and supports +#' training/fine-tuning of its trainable weights. Note that the reloaded +#' object retains none of the internal structure or custom methods of the +#' original object -- it's a brand new layer created around the saved +#' function. #' -#' Model objects are external references to Keras objects which cannot be saved -#' and restored across R sessions. The `serialize_model()` and -#' `unserialize_model()` functions provide facilities to convert Keras models to -#' R objects for persistence within R data files. +#' **Limitations:** #' -#' @note The [save_model_hdf5()] function enables saving Keras models to -#' external hdf5 files. +#' * Only call endpoints with a single `inputs` tensor argument +#' (which may optionally be a named list/list of tensors) are supported. +#' For endpoints with multiple separate input tensor arguments, consider +#' subclassing `layer_tfsm` and implementing a `call()` method with a +#' custom signature. +#' * If you need training-time behavior to differ from inference-time behavior +#' (i.e. if you need the reloaded object to support a `training=TRUE` argument +#' in `__call__()`), make sure that the training-time call function is +#' saved as a standalone endpoint in the artifact, and provide its name +#' to the `layer_tfsm` via the `call_training_endpoint` argument. #' -#' @inheritParams save_model_hdf5 -#' @param model Keras model or R "raw" object containing serialized Keras model. +#' @param filepath +#' string, the path to the SavedModel. #' -#' @return `serialize_model()` returns an R "raw" object containing an hdf5 -#' version of the Keras model. `unserialize_model()` returns a Keras model. +#' @param call_endpoint +#' Name of the endpoint to use as the `call()` method +#' of the reloaded layer. If the SavedModel was created +#' via `export_savedmodel()`, +#' then the default endpoint name is `'serve'`. In other cases +#' it may be named `'serving_default'`. #' -#' @family model persistence +#' @param object +#' Object to compose the layer with. A tensor, array, or sequential model. #' +#' @param name +#' String, name for the object +#' +#' @param dtype +#' datatype (e.g., `"float32"`). +#' +#' @param call_training_endpoint +#' see description +#' +#' @param trainable +#' see description +#' +#' @inherit layer_dense return #' @export -serialize_model <- function(model, include_optimizer = TRUE) { - - if (!inherits(model, "keras.engine.training.Model")) - stop("You must pass a Keras model object to serialize_model") +#' @family layers +#' @family saving and loading functions +# @seealso +# + +#' +#' @tether keras.layers.TFSMLayer +layer_tfsm <- +function (object, filepath, call_endpoint = "serve", call_training_endpoint = NULL, + trainable = TRUE, name = NULL, dtype = NULL) +{ + args <- capture_args(list(input_shape = normalize_shape, + batch_size = as_integer, batch_input_shape = normalize_shape), + ignore = "object") + create_layer(keras$layers$TFSMLayer, object, args) +} - # write hdf5 file to temp file - tmp <- tempfile(pattern = "keras_model", fileext = ".h5") - on.exit(unlink(tmp), add = TRUE) - save_model_hdf5(model, tmp, include_optimizer = include_optimizer) - # read it back into a raw vector - readBin(tmp, what = "raw", n = file.size(tmp)) -} -#' @rdname serialize_model +#' Registers a custom object with the Keras serialization framework. +#' +#' @description +#' This function registers a custom class or function with the Keras custom +#' object registry, so that it can be serialized and deserialized without +#' needing an entry in the user-provided `custom_objects` argument. It also injects a +#' function that Keras will call to get the object's serializable string key. +#' +#' Note that to be serialized and deserialized, classes must implement the +#' `get_config()` method. Functions do not have this requirement. +#' +#' The object will be registered under the key `'package>name'` where `name`, +#' defaults to the object name if not passed. +#' +#' # Examples +#' ```{r} +#' # Note that `'my_package'` is used as the `package` argument here, and since +#' # the `name` argument is not provided, `'MyDense'` is used as the `name`. +#' layer_my_dense <- Layer("MyDense") +#' register_keras_serializable(layer_my_dense, package = "my_package") +#' +#' MyDense <- environment(layer_my_dense)$`__class__` # the python class obj +#' stopifnot(exprs = { +#' get_registered_object('my_package>MyDense') == MyDense +#' get_registered_name(MyDense) == 'my_package>MyDense' +#' }) +#' ``` +#' +#' @param package +#' The package that this class belongs to. This is used for the +#' `key` (which is `"package>name"`) to identify the class. +#' Defaults to the current package name, or `"Custom"` outside of a package. +#' +#' @param name +#' The name to serialize this class under in this package. +#' +#' @param object +#' A keras object. +#' +#' @returns `object` is returned invisibly, for convenient piping. This is +#' primarily called for side effects. #' @export -unserialize_model <- function(model, custom_objects = NULL, compile = TRUE) { +#' @family saving and loading functions +#' @family serialization utilities +#' @tether keras.saving.register_keras_serializable +register_keras_serializable <- +function (object, name = NULL, package = NULL) +{ + + py_object <- resolve_py_obj( + object, + default_name = name %||% deparse1(substitute(object)) + ) - # write raw hdf5 bytes to temp file - tmp <- tempfile(pattern = "keras_model", fileext = ".h5") - on.exit(unlink(tmp), add = TRUE) - writeBin(model, tmp) + package <- package %||% + replace_val(environmentName(topenv(parent.frame())), + c("", "base", "R_GlobalEnv"), "Custom") - # read in from hdf5 - load_model_hdf5(tmp, custom_objects = custom_objects, compile = compile) + keras$saving$register_keras_serializable(package, name)(py_object) + invisible(object) } -reload_model <- function(object) { - old_config <- get_config(object) - old_weights <- get_weights(object) - new_model <- from_config(old_config) - set_weights(new_model, old_weights) - - new_model -} -#' Export a Saved Model +#' Get/set the currently registered custom objects. #' -#' Serialize a model to disk. +#' @description +#' Custom objects set using `custom_object_scope()` are not added to the +#' global list of custom objects, and will not appear in the returned +#' list. #' -#' @param object An \R object. -#' @param export_dir_base A string containing a directory in which to export the -#' SavedModel. -#' @param overwrite Should the \code{export_dir_base} directory be overwritten? -#' @param versioned Should the model be exported under a versioned subdirectory? -#' @param remove_learning_phase Should the learning phase be removed by saving -#' and reloading the model? Defaults to \code{TRUE}. -#' @param as_text Whether to write the SavedModel in text format. -#' @param ... Other arguments passed to tf.saved_model.save. (Used only if -#' TensorFlow version >= 2.0) +#' # Examples +#' ```{r, eval = FALSE} +#' get_custom_objects() +#' ``` #' -#' @return The path to the exported directory, as a string. +#' You can use `set_custom_objects()` to restore a previous registry state. +#' ```r +#' # within a function, if you want to temporarily modify the registry, +#' function() { +#' orig_objects <- set_custom_objects(clear = TRUE) +#' on.exit(set_custom_objects(orig_objects)) +#' +#' ## temporarily modify the global registry +#' # register_keras_serializable(....) +#' # .... +#' # on.exit(), the previous registry state is restored. +#' } +#' ``` +#' +#' @note +#' `register_keras_serializable()` is preferred over `set_custom_objects()` for +#' registering new objects. +#' +#' @returns +#' An R named list mapping registered names to registered objects. +#' `set_custom_objects()` returns the registry values before updating, invisibly. #' #' @export -export_savedmodel.keras.engine.training.Model <- function( - object, - export_dir_base, - overwrite = TRUE, - versioned = !overwrite, - remove_learning_phase = TRUE, - as_text = FALSE, - ...) { - - export_dir_base <- normalize_path(export_dir_base) - - if (!is_backend("tensorflow")) - stop("'export_savedmodel' is only supported in the TensorFlow backend.") - - if (versioned) { - export_dir_base <- file.path(export_dir_base, format(Sys.time(), "%Y%m%d%H%M%OS", tz = "GMT")) - } +#' @family serialization utilities +# @seealso +# + +#' @tether keras.saving.get_custom_objects +get_custom_objects <- +function () +{ + keras$saving$get_custom_objects() +} - if (identical(remove_learning_phase, TRUE)) { - k_set_learning_phase(0) - message("Keras learning phase set to 0 for export (restart R session before doing additional training)") - object <- reload_model(object) +#' @rdname get_custom_objects +#' @param objects A named list of custom objects, as returned by +#' `get_custom_objects()` and `set_custom_objects()`. +#' @param clear bool, whether to clear the custom object registry before +#' populating it with `objects`. +#' @export +set_custom_objects <- function(objects = named_list(), clear = TRUE) { + # This doesn't use `get_custom_objects.update()` directly because there is a + # bug upstream: modifying the global custom objects dict does not update the + # global custom names dict, and there are no consistency checks between the + # two dicts. They can get out-of-sync if you modify the global custom objects + # dict directly without updating the custom names dict. The only safe way to + # modify the global dict using the official (exported) api is to call + # register_keras_serializable(). + # o <- py_call(r_to_py(keras$saving$get_custom_objects)); o$clear() + m <- import(keras$saving$get_custom_objects$`__module__`, convert = FALSE) + out <- invisible(py_to_r(m$GLOBAL_CUSTOM_OBJECTS)) + + if(clear) { + m$GLOBAL_CUSTOM_NAMES$clear() + m$GLOBAL_CUSTOM_OBJECTS$clear() } - if (tensorflow::tf_version() >= "1.14") { - - if (overwrite && file.exists(export_dir_base)) - unlink(export_dir_base, recursive = TRUE) - - if (as_text) - warning("as_text is ignored in TensorFlow 1.14") - - tensorflow::tf$saved_model$save( - obj = object, - export_dir = export_dir_base, - ... - ) - - } else { + if(length(objects)) { + objects <- normalize_custom_objects(objects) + m$GLOBAL_CUSTOM_OBJECTS$update(objects) + m$GLOBAL_CUSTOM_NAMES$clear() + py_eval("lambda m: m.GLOBAL_CUSTOM_NAMES.update( + {obj: name for name, obj in m.GLOBAL_CUSTOM_OBJECTS.items()})")(m) + } - sess <- backend()$get_session() + out +} - input_info <- lapply(object$inputs, function(e) { - tensorflow::tf$saved_model$utils$build_tensor_info(e) - }) - output_info <- lapply(object$outputs, function(e) { - tensorflow::tf$saved_model$utils$build_tensor_info(e) - }) +#' Returns the name registered to an object within the Keras framework. +#' +#' @description +#' This function is part of the Keras serialization and deserialization +#' framework. It maps objects to the string names associated with those objects +#' for serialization/deserialization. +#' +#' @returns +#' The name associated with the object, or the default name if the +#' object is not registered. +#' +#' @param obj +#' The object to look up. +#' +#' @export +#' @family serialization utilities +# @seealso +# + +#' @tether keras.saving.get_registered_name +get_registered_name <- +function (obj) +{ + py_obj <- resolve_py_obj(obj, default_name = stop("Object must have a `name` attribute")) + keras$saving$get_registered_name(py_obj) +} - names(input_info) <- lapply(object$input_names, function(e) e) - names(output_info) <- lapply(object$output_names, function(e) e) - if (overwrite && file.exists(export_dir_base)) - unlink(export_dir_base, recursive = TRUE) +#' Returns the class associated with `name` if it is registered with Keras. +#' +#' @description +#' This function is part of the Keras serialization and deserialization +#' framework. It maps strings to the objects associated with them for +#' serialization/deserialization. +#' +#' # Examples +#' ```r +#' from_config <- function(cls, config, custom_objects = NULL) { +#' if ('my_custom_object_name' \%in\% names(config)) { +#' config$hidden_cls <- get_registered_object( +#' config$my_custom_object_name, +#' custom_objects = custom_objects) +#' } +#' } +#' ``` +#' +#' @returns +#' An instantiable class associated with `name`, or `NULL` if no such class +#' exists. +#' +#' @param name +#' The name to look up. +#' +#' @param custom_objects +#' A named list of custom objects to look the name up in. +#' Generally, custom_objects is provided by the user. +#' +#' @param module_objects +#' A named list of custom objects to look the name up in. +#' Generally, `module_objects` is provided by midlevel library +#' implementers. +#' +#' @export +#' @family serialization utilities +# @seealso +# + +#' @tether keras.saving.get_registered_object +get_registered_object <- +function (name, custom_objects = NULL, module_objects = NULL) +{ + args <- capture_args(list( + custom_objects = normalize_custom_objects, + module_objects = normalize_custom_objects + )) + obj <- do.call(keras$saving$get_registered_object, args) + # if(inherits(obj, keras$layers$Layer)) + # obj <- create_layer_wrapper(obj) + obj +} - builder <- tensorflow::tf$saved_model$builder$SavedModelBuilder(export_dir_base) - builder$add_meta_graph_and_variables( - sess, - list( - tensorflow::tf$python$saved_model$tag_constants$SERVING - ), - signature_def_map = list( - serving_default = tensorflow::tf$saved_model$signature_def_utils$build_signature_def( - inputs = input_info, - outputs = output_info, - method_name = tensorflow::tf$saved_model$signature_constants$PREDICT_METHOD_NAME - ) - ) - ) - builder$save(as_text = as_text) +#' Retrieve the full config by serializing the Keras object. +#' +#' @description +#' `serialize_keras_object()` serializes a Keras object to a named list +#' that represents the object, and is a reciprocal function of +#' `deserialize_keras_object()`. See `deserialize_keras_object()` for more +#' information about the full config format. +#' +#' @returns +#' A named list that represents the object config. +#' The config is expected to contain simple types only, and +#' can be saved as json. +#' The object can be +#' deserialized from the config via `deserialize_keras_object()`. +#' +#' @param obj +#' the Keras object to serialize. +#' +#' @export +#' @family serialization utilities +#' @seealso +#' + +# + +serialize_keras_object <- +function (obj) +{ + keras$saving$serialize_keras_object(obj) +} - } - invisible(export_dir_base) +#' Retrieve the object by deserializing the config dict. +#' +#' @description +#' The config dict is a Python dictionary that consists of a set of key-value +#' pairs, and represents a Keras object, such as an `Optimizer`, `Layer`, +#' `Metrics`, etc. The saving and loading library uses the following keys to +#' record information of a Keras object: +#' +#' - `class_name`: String. This is the name of the class, +#' as exactly defined in the source +#' code, such as "LossesContainer". +#' - `config`: Named List. Library-defined or user-defined key-value pairs that store +#' the configuration of the object, as obtained by `object$get_config()`. +#' - `module`: String. The path of the python module. Built-in Keras classes +#' expect to have prefix `keras`. +#' - `registered_name`: String. The key the class is registered under via +#' `register_keras_serializable(package, name)` API. The +#' key has the format of `'{package}>{name}'`, where `package` and `name` are +#' the arguments passed to `register_keras_serializable()`. If `name` is not +#' provided, it uses the class name. If `registered_name` successfully +#' resolves to a class (that was registered), the `class_name` and `config` +#' values in the config dict will not be used. `registered_name` is only used for +#' non-built-in classes. +#' +#' For example, the following config list represents the built-in Adam optimizer +#' with the relevant config: +#' +#' ```{r} +#' config <- list( +#' class_name = "Adam", +#' config = list( +#' amsgrad = FALSE, +#' beta_1 = 0.8999999761581421, +#' beta_2 = 0.9990000128746033, +#' epsilon = 1e-07, +#' learning_rate = 0.0010000000474974513, +#' name = "Adam" +#' ), +#' module = "keras.optimizers", +#' registered_name = NULL +#' ) +#' # Returns an `Adam` instance identical to the original one. +#' deserialize_keras_object(config) +#' ``` +#' +#' If the class does not have an exported Keras namespace, the library tracks +#' it by its `module` and `class_name`. For example: +#' +#' ```r +#' config <- list( +#' class_name = "MetricsList", +#' config = list( +#' ... +#' ), +#' module = "keras.trainers.compile_utils", +#' registered_name = "MetricsList" +#' ) +#' +#' # Returns a `MetricsList` instance identical to the original one. +#' deserialize_keras_object(config) +#' ``` +#' +#' And the following config represents a user-customized `MeanSquaredError` +#' loss: +#' +#' ```{r, include = FALSE} +#' # setup for example +#' o_registered <- set_custom_objects(clear = TRUE) +#' ``` +#' ```{r} +#' # define a custom object +#' loss_modified_mse <- Loss( +#' "ModifiedMeanSquaredError", +#' inherit = loss_mean_squared_error) +#' +#' # register the custom object +#' register_keras_serializable(loss_modified_mse) +#' +#' # confirm object is registered +#' get_custom_objects() +#' get_registered_name(loss_modified_mse) +#' +#' # now custom object instances can be serialized +#' full_config <- serialize_keras_object(loss_modified_mse()) +#' +#' # the `config` arguments will be passed to loss_modified_mse() +#' str(full_config) +#' +#' # and custom object instances can be deserialized +#' deserialize_keras_object(full_config) +#' # Returns the `ModifiedMeanSquaredError` object +#' ``` +#' ```{r, include = FALSE} +#' # cleanup from example +#' set_custom_objects(o_registered, clear = TRUE) +#' ``` +#' +#' @returns +#' The object described by the `config` dictionary. +#' +#' @param config +#' Named list describing the object. +#' +#' @param custom_objects +#' Named list containing a mapping between custom +#' object names the corresponding classes or functions. +#' +#' @param safe_mode +#' Boolean, whether to disallow unsafe `lambda` deserialization. +#' When `safe_mode=FALSE`, loading an object has the potential to +#' trigger arbitrary code execution. This argument is only +#' applicable to the Keras v3 model format. Defaults to `TRUE`. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family serialization utilities +#' @seealso +#' + +# + +deserialize_keras_object <- +function (config, custom_objects = NULL, safe_mode = TRUE, ...) +{ + args <- capture_args(list(custom_objects = normalize_custom_objects)) + do.call(keras$saving$deserialize_keras_object, args) } -#' (Deprecated) Export to Saved Model format -#' -#' @param model A Keras model to be saved. If the model is subclassed, the flag -#' `serving_only` must be set to `TRUE`. -#' @param saved_model_path a string specifying the path to the SavedModel directory. -#' @param custom_objects Optional dictionary mapping string names to custom classes -#' or functions (e.g. custom loss functions). -#' @param as_text bool, `FALSE` by default. Whether to write the SavedModel proto in text -#' format. Currently unavailable in serving-only mode. -#' @param input_signature A possibly nested sequence of `tf.TensorSpec` objects, used to -#' specify the expected model inputs. See tf.function for more details. -#' @param serving_only bool, `FALSE` by default. When this is true, only the -#' prediction graph is saved. -#' -#' @note This functionality is experimental and only works with TensorFlow -#' version >= "2.0". + +#' Provide a scope with mappings of names to custom objects #' -#' @return Invisibly returns the `saved_model_path`. -#' @family saved_model +#' @param objects Named list of objects +#' @param expr Expression to evaluate #' -#' @keywords internal +#' @details +#' There are many elements of Keras models that can be customized with +#' user objects (e.g. losses, metrics, regularizers, etc.). When +#' loading saved models that use these functions you typically +#' need to explicitly map names to user objects via the `custom_objects` +#' parameter. +#' +#' The `with_custom_object_scope()` function provides an alternative that +#' lets you create a named alias for a user object that applies to an entire +#' block of code, and is automatically recognized when loading saved models. +#' +#' # Examples +#' ```r +#' # define custom metric +#' metric_top_3_categorical_accuracy <- +#' custom_metric("top_3_categorical_accuracy", function(y_true, y_pred) { +#' metric_top_k_categorical_accuracy(y_true, y_pred, k = 3) +#' }) +#' +#' with_custom_object_scope(c(top_k_acc = sparse_top_k_cat_acc), { +#' +#' # ...define model... +#' +#' # compile model (refer to "top_k_acc" by name) +#' model |> compile( +#' loss = "binary_crossentropy", +#' optimizer = optimizer_nadam(), +#' metrics = c("top_k_acc") +#' ) +#' +#' # save the model +#' model |> save_model("my_model.keras") +#' +#' # loading the model within the custom object scope doesn't +#' # require explicitly providing the custom_object +#' reloaded_model <- load_model("my_model.keras") +#' }) +#' ``` +#' @returns The result from evaluating `expr` within the custom object scope. +#' @family saving and loading functions +#' @family serialization utilities #' @export -model_to_saved_model <- function(model, saved_model_path, custom_objects = NULL, - as_text = FALSE, input_signature = NULL, - serving_only = FALSE) { +with_custom_object_scope <- function(objects, expr) { + objects <- normalize_custom_objects(objects) + with(keras$saving$CustomObjectScope(objects), expr) +} - if (!is_tensorflow_implementation()) - stop("TensorFlow implementation is required.") +# ---- internal utilities ---- - if (tensorflow::tf_version() > "2.0") - stop("This function is deprecated as of TF version 2.1") +normalize_custom_objects <- function(objects) { - if (tensorflow::tf_version() > "1.14") - warning("This function is experimental and will be deprecated in TF 2.1") + objects <- as_list(objects) + if(!length(objects)) + return(NULL) - if (!tensorflow::tf_version() >= "1.14") - stop("TensorFlow version >= 1.14 is required. Use export_savedmodel ", - "if you need to export to saved model format in older versions.") + objects <- do.call(c, .mapply(function(object, name) { + # unwrap or convert as needed to get the python object + # try to infer correct names or raise an error + # return a named list (to convert to a dict), or NULL + if (inherits(object, "R6ClassGenerator")) + object <- r_to_py.R6ClassGenerator(object) - saved_model_path <- path.expand(saved_model_path) + object <- resolve_py_obj( + object, default_name = name %""% + stop("object name could not be infered; please supply a named list")) - tensorflow::tf$keras$experimental$export_saved_model( - model = model, - saved_model_path = saved_model_path, - custom_objects = custom_objects, - as_text = as_text, - input_signature = input_signature, - serving_only = serving_only - ) + out <- list(object) + names(out) <- as_r_value(name %""% object$`__name__`) + out + }, list(objects, rlang::names2(objects)), NULL)) - invisible(saved_model_path) + objects } -#' Load a Keras model from the Saved Model format -#' -#' @inheritParams model_to_saved_model -#' -#' @return a Keras model. -#' @family saved_model -#' -#' @note This functionality is experimental and only works with TensorFlow -#' version >= "2.0". -#' -#' @export -model_from_saved_model <- function(saved_model_path, custom_objects = NULL) { - - if (!is_tensorflow_implementation()) - stop("TensorFlow implementation is required.") - if (tensorflow::tf_version() > "2.0") - stop("This function is deprecated as of TF version 2.1") +confirm_overwrite <- function(filepath, overwrite) { + if (isTRUE(overwrite)) + return(TRUE) - if (tensorflow::tf_version() > "1.14") - warning("This function is experimental and will be deprecated in TF 2.1") + if (!file.exists(filepath)) + return(overwrite) - if (!tensorflow::tf_version() >= "1.14") - stop("TensorFlow version >= 1.14 is required. Use export_savedmodel ", - "if you need to export to saved model format in older versions.") + if (interactive()) + overwrite <- utils::askYesNo( + sprintf("File '%s' already exists - overwrite?", filepath), + default = FALSE) + if (!isTRUE(overwrite)) + stop("File '", filepath, "' already exists (pass overwrite = TRUE to force save).", + call. = FALSE) - saved_model_path <- path.expand(saved_model_path) - tensorflow::tf$keras$experimental$load_from_saved_model( - saved_model_path = saved_model_path, - custom_objects = custom_objects - ) + TRUE } diff --git a/R/model-training.R b/R/model-training.R new file mode 100644 index 0000000000..e0c96c4928 --- /dev/null +++ b/R/model-training.R @@ -0,0 +1,1145 @@ + +# ---- compile ---- +#' Configure a model for training. +#' +#' @description +#' +#' # Examples +#' ```r +#' model |> compile( +#' optimizer = optimizer_adam(learning_rate = 1e-3), +#' loss = loss_binary_crossentropy(), +#' metrics = c(metric_binary_accuracy(), +#' metric_false_negatives()) +#' ) +#' ``` +#' +#' @param object A Keras model. +#' +#' @param optimizer +#' String (name of optimizer) or optimizer instance. See +#' `optimizer_*` family. +#' +#' @param loss +#' Loss function. May be: +#' - a string (name of builtin loss function), +#' - a custom function, or +#' - a [`Loss`] instance (returned by the `loss_*` family of functions). +#' +#' A loss function is any callable with the signature +#' `loss = fn(y_true, y_pred)`, where `y_true` are the ground truth +#' values, and `y_pred` are the model's predictions. +#' `y_true` should have shape `(batch_size, d1, .. dN)` +#' (except in the case of sparse loss functions such as +#' sparse categorical crossentropy which expects integer arrays of +#' shape `(batch_size, d1, .. dN-1)`). +#' `y_pred` should have shape `(batch_size, d1, .. dN)`. +#' The loss function should return a float tensor. +#' +#' @param loss_weights +#' Optional list (named or unnamed) specifying scalar +#' coefficients (R numerics) to weight the loss contributions of +#' different model outputs. The loss value that will be minimized +#' by the model will then be the *weighted sum* of all individual +#' losses, weighted by the `loss_weights` coefficients. If an unnamed list, +#' it is expected to have a 1:1 mapping to the model's outputs. If +#' a named list, it is expected to map output names (strings) to scalar +#' coefficients. +#' +#' @param metrics +#' List of metrics to be evaluated by the model during +#' training and testing. Each of these can be: +#' - a string (name of a +#' built-in function), +#' - a function, optionally with a `"name"` attribute or +#' - a [`Metric()`] +#' instance. See the `metric_*` family of functions. +#' +#' Typically you will use +#' `metrics = c('accuracy')`. A function is any callable with the +#' signature `result = fn(y_true, y_pred)`. To specify different +#' metrics for different outputs of a multi-output model, you could +#' also pass a named list, such as +#' `metrics = list(a = 'accuracy', b = c('accuracy', 'mse'))`. +#' You can also pass a list to specify a metric or a list of +#' metrics for each output, such as +#' `metrics = list(c('accuracy'), c('accuracy', 'mse'))` +#' or `metrics = list('accuracy', c('accuracy', 'mse'))`. When you pass +#' the strings `'accuracy'` or `'acc'`, we convert this to one of +#' `metric_binary_accuracy()`, +#' `metric_categorical_accuracy()`, +#' `metric_sparse_categorical_accuracy()` based on the +#' shapes of the targets and of the model output. A similar +#' conversion is done for the strings `"crossentropy"` +#' and `"ce"` as well. +#' The metrics passed here are evaluated without sample weighting; +#' if you would like sample weighting to apply, you can specify +#' your metrics via the `weighted_metrics` argument instead. +#' +#' If providing an anonymous R function, you can customize the printed name +#' during training by assigning `attr(, "name") <- "my_custom_metric_name"`, +#' or by calling [`custom_metric("my_custom_metric_name", )`][`custom_metric()`] +#' +#' @param weighted_metrics +#' List of metrics to be evaluated and weighted by +#' `sample_weight` or `class_weight` during training and testing. +#' +#' @param run_eagerly +#' Bool. If `TRUE`, this model's forward pass +#' will never be compiled. It is recommended to leave this +#' as `FALSE` when training (for best performance), +#' and to set it to `TRUE` when debugging. +#' +#' @param steps_per_execution +#' Int. The number of batches to run +#' during each a single compiled function call. Running multiple +#' batches inside a single compiled function call can +#' greatly improve performance on TPUs or small models with a large +#' R/Python overhead. At most, one full epoch will be run each +#' execution. If a number larger than the size of the epoch is +#' passed, the execution will be truncated to the size of the +#' epoch. Note that if `steps_per_execution` is set to `N`, +#' `Callback$on_batch_begin` and `Callback$on_batch_end` methods +#' will only be called every `N` batches (i.e. before/after +#' each compiled function execution). +#' Not supported with the PyTorch backend. +#' +#' @param jit_compile +#' Bool or `"auto"`. Whether to use XLA compilation when +#' compiling a model. For `jax` and `tensorflow` backends, +#' `jit_compile="auto"` enables XLA compilation if the model +#' supports it, and disabled otherwise. +#' For `torch` backend, `"auto"` will default to eager +#' execution and `jit_compile=True` will run with `torch.compile` +#' with the `"inductor"` backend. +#' +#' @param auto_scale_loss +#' Bool. If `TRUE` and the model dtype policy is +#' `"mixed_float16"`, the passed optimizer will be automatically +#' wrapped in a `LossScaleOptimizer`, which will dynamically +#' scale the loss to prevent underflow. +#' +#' @returns This is called primarily for the side effect of modifying `object` +#' in-place. The first argument `object` is also returned, invisibly, to +#' enable usage with the pipe. +#' +#' @param object Keras model object +#' @param ... Additional arguments passed on to the `compile()` model method. +#' @export +#' @tether keras.Model.compile +#' @family model training +#' @seealso +#' + +# + +compile.keras.src.models.model.Model <- +function (object, optimizer = "rmsprop", loss = NULL, metrics = NULL, + ..., loss_weights = NULL, weighted_metrics = NULL, + run_eagerly = FALSE, + steps_per_execution = 1L, + jit_compile = "auto", + auto_scale_loss = TRUE) +{ + args <- capture_args(list( + steps_per_execution = as_integer, + loss = as_loss, + metrics = as_metrics, + weighted_metrics = as_list, + loss_weights = as.list + ), + ignore = "object") + + do.call(object$compile, args) + + # return model invisible (convenience for chaining) + invisible(object) +} + +as_loss <- function(x, default_name = "custom_loss") { + if (is.null(x) || is_string(x)) + return(x) + if (is.character(x)) # failed is_string(x), length(x) != 1 + return(as.list(x)) + if (is.list(x)) # recurse for multi-output models + return(imap(x, function(el, i) { + as_loss(el, default_name = paste(default_name, i, sep = "_")) + })) + resolve_py_obj(x, default_name = default_name, prefer_class = FALSE) +} + +as_metrics <- function(x) as_list(as_loss(x, default_name = "custom_metric")) + + +# ---- evaluate ---- + + +#' Evaluate a Keras Model +#' +#' @description +#' This functions returns the loss value and metrics values for the model in +#' test mode. +#' Computation is done in batches (see the `batch_size` arg.) +#' +#' @returns +#' Scalar test loss (if the model has a single output and no metrics) +#' or list of scalars (if the model has multiple outputs +#' and/or metrics). The attribute `model$metrics_names` will give you +#' the display labels for the scalar outputs. +#' +#' @param x +#' Input data. It could be: +#' - An R array (or array-like), or a list of arrays +#' (in case the model has multiple inputs). +#' - A tensor, or a list of tensors +#' (in case the model has multiple inputs). +#' - A named list mapping input names to the corresponding array/tensors, +#' if the model has named inputs. +#' - A `tf.data.Dataset`. Should return a tuple +#' of either `(inputs, targets)` or +#' `(inputs, targets, sample_weights)`. +#' - A generator returning +#' `(inputs, targets)` or `(inputs, targets, sample_weights)`. +#' +#' @param y +#' Target data. Like the input data `x`, it could be either R +#' array(s) or backend-native tensor(s). +#' If `x` is a `tf.data.Dataset` or generator function, +#' `y` should not be specified +#' (since targets will be obtained from the iterator/dataset). +#' +#' @param batch_size +#' Integer or `NULL`. Number of samples per batch of +#' computation. If unspecified, `batch_size` will default to `32`. Do +#' not specify the `batch_size` if your data is in the form of a +#' a tf dataset or generator +#' (since they generate batches). +#' +#' @param verbose +#' `"auto"`, `0`, `1`, or `2`. Verbosity mode. +#' `0` = silent, `1` = progress bar, `2` = single line. +#' `"auto"` becomes `1` for most cases, +#' `2` if in a knitr render or running on a distributed training server. +#' Note that the progress bar is not +#' particularly useful when logged to a file, so `verbose=2` is +#' recommended when not running interactively +#' (e.g. in a production environment). Defaults to `"auto"`. +#' +#' @param sample_weight +#' Optional array of weights for the test samples, +#' used for weighting the loss function. You can either pass a flat +#' (1D) R array with the same length as the input samples +#' (1:1 mapping between weights and samples), or in the case of +#' temporal data, you can pass a 2D array with shape `(samples, +#' sequence_length)`, to apply a different weight to every +#' timestep of every sample. This argument is not supported when +#' `x` is a tfdataset, instead pass sample weights as the third +#' element of `x`. +#' +#' @param steps +#' Integer or `NULL`. Total number of steps (batches of samples) +#' before declaring the evaluation round finished. Ignored with the +#' default value of `NULL`. If `x` is a `tf.data.Dataset` and +#' `steps` is `NULL`, evaluation will run until the dataset +#' is exhausted. +#' +#' @param callbacks +#' List of `Callback` instances. +#' List of callbacks to apply during evaluation. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @param object Keras model object +#' +#' @export +#' @tether keras.Model.evaluate +#' @family model training +#' @seealso +#' + +# + +evaluate.keras.src.models.model.Model <- +function (object, x = NULL, y = NULL, ..., batch_size = NULL, + verbose = getOption("keras.verbose", default = "auto"), + sample_weight = NULL, steps = NULL, callbacks = NULL) +{ + normalize_input_data <- input_data_normalizer(object) + args <- capture_args(list(x = normalize_input_data, + y = normalize_input_data, + sample_weight = normalize_input_data, + batch_size = as_integer, steps = as_integer, + verbose = as_model_verbose_arg), + ignore = "object", + force = "verbose") + args[["return_dict"]] <- TRUE + + if(inherits(args$x, "tensorflow.python.data.ops.dataset_ops.DatasetV2") && + !is.null(args$batch_size)) + stop("batch_size can not be specified with a TF Dataset") + + result <- do.call(object$evaluate, args) + + tfruns::write_run_metadata("evaluation", unlist(result)) + + result +} + + +# ---- fit ---- +#' Train a model for a fixed number of epochs (dataset iterations). +#' +#' @details +#' Unpacking behavior for iterator-like inputs: +#' +#' A common pattern is to pass an iterator like object such as a +#' `tf.data.Dataset` or a generator to `fit()`, +#' which will in fact yield not only features (`x`) +#' but optionally targets (`y`) and sample weights (`sample_weight`). +#' Keras requires that the output of such iterator-likes be +#' unambiguous. The iterator should return a `tuple()` +#' of length 1, 2, or 3, where the optional second and third elements +#' will be used for `y` and `sample_weight` respectively. +#' Any other type provided will be wrapped in +#' a length-one `tuple()`, effectively treating everything as `x`. When +#' yielding named lists, they should still adhere to the top-level tuple +#' structure, +#' e.g. `tuple(list(x0 = x0, x = x1), y)`. Keras will not attempt to separate +#' features, targets, and weights from the keys of a single dict. +#' +#' @returns +#' A `keras_training_history` object, which is a named list: +#' `list(params = , metrics = ")`, with S3 methods +#' `print()`, `plot()`, and `as.data.frame()`. The metrics +#' field is +#' a record of training loss values and metrics values +#' at successive epochs, as well as validation loss values +#' and validation metrics values (if applicable). +#' +#' @param x +#' Input data. It could be: +#' - An array (or array-like), or a list of arrays +#' (in case the model has multiple inputs). +#' - A tensor, or a list of tensors +#' (in case the model has multiple inputs). +#' - A named list mapping input names to the corresponding array/tensors, +#' if the model has named inputs. +#' - A `tf.data.Dataset`. Should return a tuple +#' of either `(inputs, targets)` or +#' `(inputs, targets, sample_weights)`. +#' - A generator returning `(inputs, +#' targets)` or `(inputs, targets, sample_weights)`. +#' +#' @param y +#' Target data. Like the input data `x`, +#' it could be either array(s) or backend-native tensor(s). +#' If `x` is a TF Dataset or generator, +#' `y` should +#' not be specified (since targets will be obtained from `x`). +#' +#' @param batch_size +#' Integer or `NULL`. +#' Number of samples per gradient update. +#' If unspecified, `batch_size` will default to `32`. +#' Do not specify the `batch_size` if your data is in the +#' form of TF Datasets or generators, +#' (since they generate batches). +#' +#' @param epochs +#' Integer. Number of epochs to train the model. +#' An epoch is an iteration over the entire `x` and `y` +#' data provided +#' (unless the `steps_per_epoch` flag is set to +#' something other than `NULL`). +#' Note that in conjunction with `initial_epoch`, +#' `epochs` is to be understood as "final epoch". +#' The model is not trained for a number of iterations +#' given by `epochs`, but merely until the epoch +#' of index `epochs` is reached. +#' +#' @param verbose +#' `"auto"`, `0`, `1`, or `2`. Verbosity mode. +#' `0` = silent, `1` = progress bar, `2` = one line per epoch. +#' `"auto"` becomes 1 for most cases, +#' `2` if in a knitr render or running on a distributed training server. +#' Note that the progress bar is not +#' particularly useful when logged to a file, +#' so `verbose=2` is recommended when not running interactively +#' (e.g., in a production environment). Defaults to `"auto"`. +#' +#' @param callbacks +#' List of `Callback()` instances. +#' List of callbacks to apply during training. +#' See `callback_*`. +#' +#' @param validation_split +#' Float between 0 and 1. +#' Fraction of the training data to be used as validation data. +#' The model will set apart this fraction of the training data, +#' will not train on it, and will evaluate +#' the loss and any model metrics +#' on this data at the end of each epoch. +#' The validation data is selected from the last samples +#' in the `x` and `y` data provided, before shuffling. This +#' argument is not supported when `x` is a TF Dataset or generator. +#' If both `validation_data` and `validation_split` are provided, +#' `validation_data` will override `validation_split`. +#' +#' @param validation_data +#' Data on which to evaluate +#' the loss and any model metrics at the end of each epoch. +#' The model will not be trained on this data. Thus, note the fact +#' that the validation loss of data provided using +#' `validation_split` or `validation_data` is not affected by +#' regularization layers like noise and dropout. +#' `validation_data` will override `validation_split`. +#' It could be: +#' - A tuple `(x_val, y_val)` of arrays or tensors. +#' - A tuple `(x_val, y_val, val_sample_weights)` of +#' arrays. +#' - A generator returning +#' `(inputs, targets)` or `(inputs, targets, sample_weights)`. +#' +#' @param shuffle +#' Boolean, whether to shuffle the training data +#' before each epoch. This argument is +#' ignored when `x` is a generator or a TF Dataset. +#' +#' @param class_weight +#' Optional named list mapping class indices (integers, 0-based) +#' to a weight (float) value, used for weighting the loss function +#' (during training only). +#' This can be useful to tell the model to +#' "pay more attention" to samples from +#' an under-represented class. When `class_weight` is specified +#' and targets have a rank of 2 or greater, either `y` must be +#' one-hot encoded, or an explicit final dimension of `1` must +#' be included for sparse class labels. +#' +# @param class_names +#' +#' @param sample_weight +#' Optional array of weights for +#' the training samples, used for weighting the loss function +#' (during training only). You can either pass a flat (1D) +#' array/vector with the same length as the input samples +#' (1:1 mapping between weights and samples), +#' or in the case of temporal data, +#' you can pass a 2D array (matrix) with shape +#' `(samples, sequence_length)`, +#' to apply a different weight to every timestep of every sample. +#' This argument is not supported when `x` is a TF Dataset or generator, +#' instead provide the +#' sample_weights as the third element of `x`. +#' Note that sample weighting does not apply to metrics specified +#' via the `metrics` argument in `compile()`. To apply sample +#' weighting to your metrics, you can specify them via the +#' `weighted_metrics` in `compile()` instead. +#' +#' @param initial_epoch +#' Integer. +#' Epoch at which to start training +#' (useful for resuming a previous training run). +#' +#' @param steps_per_epoch +#' Integer or `NULL`. +#' Total number of steps (batches of samples) +#' before declaring one epoch finished and starting the +#' next epoch. When training with input tensors such as +#' backend-native tensors, the default `NULL` is equal to +#' the number of samples in your dataset divided by +#' the batch size, or `1` if that cannot be determined. If `x` is a +#' TF Dataset, and `steps_per_epoch` +#' is `NULL`, the epoch will run until the input dataset is +#' exhausted. When passing an infinitely repeating dataset, you +#' must specify the `steps_per_epoch` argument. If +#' `steps_per_epoch = -1` the training will run indefinitely with an +#' infinitely repeating dataset. +#' +#' @param validation_steps +#' Only relevant if `validation_data` is provided. +#' Total number of steps (batches of +#' samples) to draw before stopping when performing validation +#' at the end of every epoch. If `validation_steps` is `NULL`, +#' validation will run until the `validation_data` dataset is +#' exhausted. In the case of an infinitely repeated dataset, it +#' will run into an infinite loop. If `validation_steps` is +#' specified and only part of the dataset will be consumed, the +#' evaluation will start from the beginning of the dataset at each +#' epoch. This ensures that the same validation samples are used +#' every time. +#' +#' @param validation_batch_size +#' Integer or `NULL`. +#' Number of samples per validation batch. +#' If unspecified, will default to `batch_size`. +#' Do not specify the `validation_batch_size` if your data is in +#' the form of TF Datasets or generator +#' instances (since they generate batches). +#' +#' @param validation_freq +#' Only relevant if validation data is provided. +#' Specifies how many training epochs to run +#' before a new validation run is performed, +#' e.g. `validation_freq=2` runs validation every 2 epochs. +#' +#' @param object Keras model object +#' +#' @param view_metrics View realtime plot of training metrics (by epoch). The +#' default (`"auto"`) will display the plot when running within RStudio, +#' `metrics` were specified during model [compile()], `epochs > 1` and +#' `verbose > 0`. Set the global `options(keras.view_metrics = )` option to +#' establish a different default. +#' +#' @param ... Additional arguments passed on to the model `fit()` method. +#' +#' @export +#' @tether keras.Model.fit +#' @seealso +#' + +# + +fit.keras.src.models.model.Model <- +function(object, + x = NULL, + y = NULL, + ..., + batch_size = NULL, + epochs = 1L, + callbacks = NULL, + validation_split = 0, + validation_data = NULL, + shuffle = TRUE, + class_weight = NULL, + # class_names = names(class_weight), + sample_weight = NULL, + initial_epoch = 1L, + steps_per_epoch = NULL, + validation_steps = NULL, + validation_batch_size = NULL, + validation_freq = 1L, + verbose = getOption("keras.verbose", default = "auto"), + view_metrics = getOption("keras.view_metrics", default = "auto")) +{ + normalize_input_data <- input_data_normalizer(object) + args <- capture_args( + list( + x = normalize_input_data, + y = normalize_input_data, + sample_weight = normalize_input_data, + validation_data = normalize_input_data, + + batch_size = as_integer, + validation_batch_size = as_integer, + epochs = as_integer, + initial_epoch = as_index, + steps_per_epoch = as_integer, + validation_freq = as_integer, + validation_steps = as_integer, + sample_weight = as_array, + class_weight = as_class_weight, + verbose = as_model_verbose_arg + ), + ignore = c("object", "class_names", "view_metrics"), + force = "verbose" + ) + + if (identical(view_metrics, "auto")) + view_metrics <- resolve_view_metrics( + args$verbose %||% as_model_verbose_arg(verbose), + args$epochs %||% epochs, + object$metrics) + + args$callbacks <- normalize_callbacks_with_metrics( + view_metrics, + (args$initial_epoch %||% initial_epoch), + args$callbacks + ) + + # nameOfClass(tensorflow::tf$data$Dataset) + if(inherits(args$x, "tensorflow.python.data.ops.dataset_ops.DatasetV2") && + !is.null(args$batch_size)) + stop("batch_size can not be specified with a TF Dataset") + + history <- do.call(object$fit, args) + + # convert to a keras_training history object + history <- to_keras_training_history(history) + + # write metadata contained in history + write_history_metadata(history) + + # return the history invisibly + invisible(history) +} + + +input_data_normalizer <- function(model) { + force(model) + delayedAssign("dtype", + as_r_value(py_get_attr(model, "input_dtype", silent = TRUE)) %||% + keras$config$floatx() + ) + .normalize <- function(x) { + if (is.null(x) || is_py_object(x)) + return(x) + if (is.list(x)) + return(lapply(x, .normalize)) + if (is.function(x)) + return(as_data_generator(x, dtype)) + + if (inherits(x, "factor")) + x <- array(as.integer(x) - 1L, + dim = dim(x) %||% length(x)) + + # only autocast to different sizes of the same type, + # don't auto convert floats to ints, or ints to floats + if (!( + ( is.double(x) && grepl("float", dtype) ) || + ( is.integer(x) && grepl("int", dtype) ) + )) + dtype <- NULL + + np_array(x, dtype) + } +} + + + +# ---- predict ---- +#' Generates output predictions for the input samples. +#' +#' @details +#' Computation is done in batches. This method is designed for batch +#' processing of large numbers of inputs. It is not intended for use inside +#' of loops that iterate over your data and process small numbers of inputs +#' at a time. +#' +#' For small numbers of inputs that fit in one batch, +#' directly call the model `model$call` for faster execution, e.g., +#' `model(x)`, or `model(x, training = FALSE)` if you have layers such as +#' `BatchNormalization` that behave differently during +#' inference. +#' +#' # Note +#' See [this FAQ entry]( +#' https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call) +#' for more details about the difference between `Model` methods +#' `predict()` and `call()`. +#' +#' @returns +#' R array(s) of predictions. +#' +#' @param x +#' Input samples. It could be: +#' - A array (or array-like), or a list of arrays +#' (in case the model has multiple inputs). +#' - A tensor, or a list of tensors +#' (in case the model has multiple inputs). +#' - A TF Dataset. +#' +#' @param batch_size +#' Integer or `NULL`. +#' Number of samples per batch. +#' If unspecified, `batch_size` will default to `32`. +#' Do not specify the `batch_size` if your data is in the +#' form of a TF Dataset or a generator +#' (since they generate batches). +#' +#' @param verbose +#' `"auto"`, `0`, `1`, or `2`. Verbosity mode. +#' `0` = silent, `1` = progress bar, `2` = one line per epoch. +#' `"auto"` becomes 1 for most cases, +#' `2` if in a knitr render or running on a distributed training server. +#' Note that the progress bar is not +#' particularly useful when logged to a file, +#' so `verbose=2` is recommended when not running interactively +#' (e.g., in a production environment). Defaults to `"auto"`. +#' +#' @param steps +#' Total number of steps (batches of samples) +#' before declaring the prediction round finished. +#' Ignored with the default value of `NULL`. +#' If `x` is a TF Dataset and `steps` is `NULL`, +#' `predict()` will run until the input dataset is exhausted. +#' +#' @param callbacks +#' List of `Callback` instances. +#' List of callbacks to apply during prediction. +#' +#' @param object Keras model object +#' @param ... For forward/backward compatability. +#' +#' @export +#' @importFrom stats predict +#' @tether keras.Model.predict +#' @family model training +#' @seealso +#' + +# + +predict.keras.src.models.model.Model <- +function (object, x, ..., batch_size = NULL, + verbose = getOption("keras.verbose", default = "auto"), steps = NULL, + callbacks = NULL) +{ + normalize_input_data <- input_data_normalizer(object) + args <- capture_args(list(x = normalize_input_data, + batch_size = as_integer, steps = as_integer, + verbose = as_model_verbose_arg), + ignore = "object", + force = "verbose") + + if(inherits(args$x, "tensorflow.python.data.ops.dataset_ops.DatasetV2") && + !is.null(args$batch_size)) + stop("batch_size can not be specified with a TF Dataset") + + do.call(object$predict, args) +} + +# ---- predict_on_batch ---- +#' Returns predictions for a single batch of samples. +#' +#' @returns +#' Array(s) of predictions. +#' +#' @param object Keras model object +#' +#' @param x +#' Input data. It must be array-like. +#' +#' @export +#' @tether keras.Model.predict_on_batch +#' @family model training +#' @seealso +#' + +# + +predict_on_batch <- +function(object, x) +{ + object$predict_on_batch(as_array(x)) +} + + +# ---- test_on_batch ---- +#' Test the model on a single batch of samples. +#' +#' @returns +#' A scalar loss value (when no metrics), +#' or a named list of loss and metric values +#' (if there are metrics). +#' +#' @param x +#' Input data. Must be array-like. +#' +#' @param y +#' Target data. Must be array-like. +#' +#' @param sample_weight +#' Optional array of the same length as x, containing +#' weights to apply to the model's loss for each sample. +#' In the case of temporal data, you can pass a 2D array +#' with shape `(samples, sequence_length)`, to apply a different +#' weight to every timestep of every sample. +#' +# @param return_dict +# If `TRUE`, loss and metric results are returned as a +# dict, with each key being the name of the metric. If `FALSE`, +# they are returned as a list. +#' +#' @param object Keras model object +#' @param ... for forward/backward compatability +#' +#' @export +#' @tether keras.Model.test_on_batch +#' @family model training +#' @seealso +#' + +# + +test_on_batch <- +function (object, x, y = NULL, sample_weight = NULL, ...) +{ + result <- object$test_on_batch(as_array(x), + as_array(y), + as_array(sample_weight), ..., return_dict = TRUE) + if (is_scalar(result)) result[[1L]] else result +} + +# ---- test_on_batch ---- +#' Runs a single gradient update on a single batch of data. +#' +#' @returns +#' A scalar loss value (when no metrics), +#' or a named list of loss and metric values +#' (if there are metrics). +#' The property `model$metrics_names` +#' will give you the display labels for the scalar outputs. +#' +#' @param x +#' Input data. Must be array-like. +#' +#' @param y +#' Target data. Must be array-like. +#' +#' @param sample_weight +#' Optional array of the same length as x, containing +#' weights to apply to the model's loss for each sample. +#' In the case of temporal data, you can pass a 2D array +#' with shape `(samples, sequence_length)`, to apply a different +#' weight to every timestep of every sample. +#' +#' @param class_weight +#' Optional named list mapping class indices (integers, 0-based) +#' to a weight (float) to apply to the model's loss for the samples +#' from this class during training. This can be useful to tell the +#' model to "pay more attention" to samples from an +#' under-represented class. When `class_weight` is specified +#' and targets have a rank of 2 or greater, either `y` must +#' be one-hot encoded, or an explicit final dimension of 1 +#' must be included for sparse class labels. +#' +# @param return_dict +# If `True`, loss and metric results are returned as a +# dict, with each key being the name of the metric. If `False`, +# they are returned as a list. +#' +#' @param object Keras model object +#' +#' @export +#' @tether keras.Model.train_on_batch +#' @family model training +#' @seealso +#' + +# + +train_on_batch <- +function (object, x, y = NULL, sample_weight = NULL, class_weight = NULL) +{ + result <- object$train_on_batch(as_array(x), + as_array(y), + as_array(sample_weight), + class_weight = as_class_weight(class_weight), + return_dict = TRUE) + if(is_scalar(result)) result[[1L]] else result +} + + + + +# ---- summary ---- +#' Print a summary of a Keras Model +#' +#' @param line_length +#' Total length of printed lines +#' (e.g. set this to adapt the display to different +#' terminal window sizes). +#' +#' @param positions +#' Relative or absolute positions of log elements +#' in each line. If not provided, becomes +#' `c(0.3, 0.6, 0.7, 1)`. Defaults to `NULL`. +#' +# ' @param print_fn +# ' Print function to use. By default, prints to `stdout`. +# ' It will be called on each line of the summary. +# ' You can set it to a custom function +# ' in order to capture the string summary. +#' +#' @param expand_nested +#' Whether to expand the nested models. +#' Defaults to `FALSE`. +#' +#' @param show_trainable +#' Whether to show if a layer is trainable. +#' Defaults to `FALSE`. +#' +#' @param layer_range +#' a list, tuple, or vector of 2 strings, +#' which is the starting layer name and ending layer name +#' (both inclusive) indicating the range of layers to be printed +#' in summary. It also accepts regex patterns instead of exact +#' name. In such case, start predicate will be the first element +#' it matches to `layer_range[[1]]` and the end predicate will be +#' the last element it matches to `layer_range[[1]]`. +#' By default `NULL` which considers all layers of model. +#' +#' @param object,x Keras model instance +#' @param line_length Total length of printed lines +#' @param positions Relative or absolute positions of log elements in each line. +#' If not provided, defaults to `c(0.33, 0.55, 0.67, 1.0)`. +#' @param expand_nested Whether to expand the nested models. If not provided, +#' defaults to `FALSE`. +#' @param show_trainable Whether to show if a layer is trainable. If not +#' provided, defaults to `FALSE`. +#' @param compact Whether to remove white-space only lines from the model +#' summary. (Default `TRUE`) +#' @param ... for `summary()` and `print()`, passed on to `format()`. For +#' `format()`, passed on to `model$summary()`. +#' +#' @family model functions +#' +#' @returns `format()` returns a length 1 character vector. `print()` returns the +#' model object invisibly. `summary()` returns the output of `format()` +#' invisibly after printing it. +#' +#' @section Enabling color output in Knitr (RMarkdown, Quarto): +#' +#' In order to enable color output in a quarto or rmarkdown document with +#' an html output format (include revealjs presentations), then you will need +#' to do the following in a setup chunk: +#' +#' +#' ```` +#' ```{r setup, include = FALSE} +#' options(cli.num_colors = 256) +#' fansi::set_knit_hooks(knitr::knit_hooks) +#' options(width = 75) # adjust as needed for format +#' ``` +#' ```` +#' +#' +#' @export +summary.keras.src.models.model.Model <- function(object, ...) { + writeLines(f <- format.keras.src.models.model.Model(object, ...)) + # TODO: knit_print...? + invisible(f) +} + + +#' @rdname summary.keras.src.models.model.Model +#' @export +format.keras.src.models.model.Model <- +function(x, + line_length = getOption("width"), # width - (12L * show_trainable), + positions = NULL, + expand_nested = FALSE, + show_trainable = NA, + ..., + # force_ascii ... (impl in man/roxygen/meta.R) + # width = getOption("width"), + # rich = TRUE, ?? + # print_fn = NULL, + layer_range = NULL, + compact = TRUE) { + + if (py_is_null_xptr(x)) + return("") + + args <- capture_args(ignore = c("x", "compact", "width"), + force = c("show_trainable", "line_length")) + + if(is.na(args$show_trainable)) { + built <- as_r_value(py_get_attr(x, "built", silent = TRUE)) %||% FALSE + args$show_trainable <- built && as.logical(length(x$non_trainable_weights)) + } + + # args$print_fn <- function(x, ...) {browser(); x} + + # Do we need to check for model$built before calling summary? + with_rich_config( + out <- trimws(py_capture_output(do.call(x$summary, args))) + ) + + if(compact) { + # strip empty lines + out <- gsub("(\\n\\s*\\n)", "\n", out, perl = TRUE) + if(expand_nested) + out <- gsub("\\n\\|\\s+\\|\\n", "\n", out) + } + + out +} + +# +#' @rdname summary.keras.src.models.model.Model +#' @export +print.keras.src.models.model.Model <- function(x, ...) { + writeLines(format.keras.src.models.model.Model(x, ...)) + invisible(x) +} + +#' @importFrom reticulate py_str +#' @export +py_str.keras.src.models.model.Model <- function(object, ...) { + format.keras.src.models.model.Model(object, ...) +} + + +with_rich_config <- function(expr) { + + vars <- list( + COLUMNS = as.character(getOption("width")) + ) + + if (Sys.getenv("COLORTERM", "truecolor") == "truecolor" && + cli::num_ansi_colors() >= 256L) { + vars$COLORTERM <- "truecolor" + vars$FORCE_COLOR <- "yes" + } + + with_envvar2(vars, expr) +} + + +with_envvar2 <- function(vars, expr) { + py_environ <- import("os", convert = FALSE)$environ + + og_r_vars <- Sys.getenv(names(vars), unset = NA_character_, names = TRUE) + og_py_vars <- lapply(names(vars), function(key) + py_get_item(py_environ, key, silent = TRUE)) + names(og_py_vars) <- names(vars) + + names_unset_vars <- + names(vars[map_lgl(vars, function(v) is.null(v) || is.na(v))]) + vars <- vars[setdiff(names(vars), names_unset_vars)] + if (length(vars)) { + do.call(Sys.setenv, as.list(vars)) + imap(vars, function(val, key) { + py_set_item(py_environ, key, val) + }) + } + for (name in names_unset_vars) { + Sys.unsetenv(name) + py_del_item(py_environ, name) + } + + on.exit({ + og_r_var_was_unset <- is.na(og_r_vars) + set_r_vars <- og_r_vars[!og_r_var_was_unset] + if (length(set_r_vars)) + do.call(Sys.setenv, as.list(set_r_vars)) + for (name in names(og_r_vars)[og_r_var_was_unset]) + Sys.unsetenv(name) + + imap(og_py_vars, function(val, key) { + if (is.null(val)) + py_del_item(py_environ, key) + else + py_set_item(py_environ, key, val) + NULL + }) + + NULL + }, add = TRUE) + force(expr) +} + + + +# ---- internal utils ---- + + + +as_model_verbose_arg <- function(x) { + if(!identical(x, "auto")) + return(as.integer(x)) + # x == auto + if(isTRUE(getOption('knitr.in.progress'))) + return(2L) + x # "auto" +} + + +as_class_weight <- function(class_weight, class_names = NULL) { + if (is.null(class_weight)) + return(NULL) + if (is.numeric(class_weight)) + class_weight <- as.list(class_weight) + + # convert class weights to python dict + if (is.list(class_weight)) + # dict() converts numeric (chr) names to numeric (dbl) keys + return(dict(class_weight)) + + stop("class_weight must be a named list of weights") +} + + + +# determine whether to view metrics or not +resolve_view_metrics <- function(verbose, epochs, metrics) { + (epochs > 1) && # more than 1 epoch + (verbose > 0) && # verbose mode is on + !is.null(getOption("viewer")) && # have an internal viewer available + nzchar(Sys.getenv("RSTUDIO")) # running under RStudio +} + + +write_history_metadata <- function(history) { + properties <- list() + properties$validation_samples <- history$params$validation_samples + tfruns::write_run_metadata("properties", properties) +} + + + +py_generator <- function(fn, completed = NULL, prefetch = 0L, convert = FALSE) { + iterator2generator <- py_eval("lambda iterator: (yield from iterator)", + convert = convert) + py_call(iterator2generator, py_iterator(fn, completed, prefetch)) +} + + +as_data_generator <- function(fn, dtype = NULL) { + force(fn); force(dtype) + python_path <- system.file("python", package = "keras3") + tools <- reticulate::import_from_path("kerastools", path = python_path) + + py_generator(function() { + x <- keras_array(fn(), dtype = dtype) + if (is.null(x)) + NULL + else + tuple(x) + }, completed = NULL, prefetch = 1L) + +} + + + + + + +# ' @exportS3Method knitr::knit_print +knit_print__keras.src.models.model.Model <- function(x, ...) { + #from keras.src.utils.summary_utils + # record_console <- py_run_string(local = TRUE, glue::trim(" + # class record_console: + # def __init__(self): + # self.last_console = None + # + # def __enter__(self, *args): + # import rich + # self.rich = rich + # from functools import wraps + # og_Console = + # self.og_Console = rich.console.Console + # @wraps(og_Console) + # def Console(*args, record = True, **kwargs): + # kwargs['record'] = record + # global last_console + # self.last_console = self.og_Console(*args, **kwargs) + # return self.last_console + # rich.console.Console = Console + # + # def __exit__(self, *args): + # self.rich.console.Console = self.og_Console + # "))$record_console + + knitrtools <- import_kerastools("knitr") + recorder <- knitrtools$RichConsoleRecorder() + # restore <- py_local$restore + with(recorder, { + format.keras.src.models.model.Model(x) + }) + + if(knitr::is_html_output()) { + html <- recorder$console$export_html( + inline_styles = TRUE, + clear = TRUE + ) + knitr::raw_html(html) + } else { + text <- recorder$console$export_text( + styles = FALSE, # plain text + clear = TRUE + ) + + text + } + +} diff --git a/R/model.R b/R/model.R deleted file mode 100644 index 5468af6e96..0000000000 --- a/R/model.R +++ /dev/null @@ -1,1577 +0,0 @@ - -#' Keras Model -#' -#' A model is a directed acyclic graph of layers. -#' -#' @param inputs Input layer -#' @param outputs Output layer -#' @param ... Any additional arguments -#' @family model functions -#' -#' @examples -#' \dontrun{ -#' library(keras) -#' -#' # input layer -#' inputs <- layer_input(shape = c(784)) -#' -#' # outputs compose input + dense layers -#' predictions <- inputs %>% -#' layer_dense(units = 64, activation = 'relu') %>% -#' layer_dense(units = 64, activation = 'relu') %>% -#' layer_dense(units = 10, activation = 'softmax') -#' -#' # create and compile model -#' model <- keras_model(inputs = inputs, outputs = predictions) -#' model %>% compile( -#' optimizer = 'rmsprop', -#' loss = 'categorical_crossentropy', -#' metrics = c('accuracy') -#' ) -#' } -#' @export -keras_model <- function(inputs, outputs = NULL, ...) { - if (tf_version() < "2.4") - names(inputs) <- names(outputs) <- NULL - - keras$models$Model(inputs = inputs, outputs = outputs, ...) -} - - -#' Keras Model composed of a linear stack of layers -#' -#' @param layers List of layers to add to the model -#' @param name Name of model -#' @inheritDotParams sequential_model_input_layer -#' -#' @note -#' -#' If any arguments are provided to `...`, then the sequential model is -#' initialized with a `InputLayer` instance. If not, then the first layer passed -#' to a Sequential model should have a defined input shape. What that means is -#' that it should have received an `input_shape` or `batch_input_shape` -#' argument, or for some type of layers (recurrent, Dense...) an `input_dim` -#' argument. -#' -#' @family model functions -#' -#' @examples -#' \dontrun{ -#' -#' library(keras) -#' -#' model <- keras_model_sequential() -#' model %>% -#' layer_dense(units = 32, input_shape = c(784)) %>% -#' layer_activation('relu') %>% -#' layer_dense(units = 10) %>% -#' layer_activation('softmax') -#' -#' model %>% compile( -#' optimizer = 'rmsprop', -#' loss = 'categorical_crossentropy', -#' metrics = c('accuracy') -#' ) -#' -#' # alternative way to provide input shape -#' model <- keras_model_sequential(input_shape = c(784)) %>% -#' layer_dense(units = 32) %>% -#' layer_activation('relu') %>% -#' layer_dense(units = 10) %>% -#' layer_activation('softmax') -#' -#' } -#' @export -keras_model_sequential <- function(layers = NULL, name = NULL, ...) { - - if (length(list(...))) - layers <- c(sequential_model_input_layer(...), layers) - - keras$models$Sequential(layers = layers, name = name) -} - - - - -#' sequential_model_input_layer -#' -#' @param input_shape an integer vector of dimensions (not including the batch -#' axis), or a `tf$TensorShape` instance (also not including the batch axis). -#' @param batch_size Optional input batch size (integer or NULL). -#' @param dtype Optional datatype of the input. When not provided, the Keras -#' default float type will be used. -#' @param input_tensor Optional tensor to use as layer input. If set, the layer -#' will use the `tf$TypeSpec` of this tensor rather than creating a new -#' placeholder tensor. -#' @param sparse Boolean, whether the placeholder created is meant to be sparse. -#' Default to `FALSE`. -#' @param ragged Boolean, whether the placeholder created is meant to be ragged. -#' In this case, values of 'NULL' in the 'shape' argument represent ragged -#' dimensions. For more information about `RaggedTensors`, see this -#' [guide](https://www.tensorflow.org/guide/ragged_tensor). Default to -#' `FALSE`. -#' @param type_spec A `tf$TypeSpec` object to create Input from. This -#' `tf$TypeSpec` represents the entire batch. When provided, all other args -#' except name must be `NULL`. -#' @param ... additional arguments passed on to `keras$layers$InputLayer`. -#' @param input_layer_name,name Optional name of the input layer (string). -#' -sequential_model_input_layer <- function(input_shape = NULL, - batch_size = NULL, - dtype = NULL, - input_tensor = NULL, - sparse = NULL, - name = NULL, - ragged = NULL, - type_spec = NULL, - ..., - input_layer_name = NULL) { - # keras$layers$Input can't be used with a Sequential Model, have to use - # keras$layers$LayerInput instead. - args <- capture_args(match.call(), - list(input_shape = as_shape, - batch_size = as_nullable_integer)) - - if ("input_layer_name" %in% names(args)) { - # a bare `name` arg would normally belong to the model, not the input layer - if (!is.null(args[["input_layer_name"]])) - args[["name"]] <- args[["input_layer_name"]] - - args[["input_layer_name"]] <- NULL - } - - do.call(keras$layers$InputLayer, args) -} - - - -#' (Deprecated) Replicates a model on different GPUs. -#' -#' @param model A Keras model instance. To avoid OOM errors, -#' this model could have been built on CPU, for instance -#' (see usage example below). -#' @param gpus `NULL` to use all available GPUs (default). Integer >= 2 or -#' list of integers, number of GPUs or list of GPU IDs on which to create -#' model replicas. -#' @param cpu_merge A boolean value to identify whether to force -#' merging model weights under the scope of the CPU or not. -#' @param cpu_relocation A boolean value to identify whether to -#' create the model's weights under the scope of the CPU. -#' If the model is not defined under any preceding device -#' scope, you can still rescue it by activating this option. -#' -#' @return A Keras model object which can be used just like the initial -#' `model` argument, but which distributes its workload on multiple GPUs. -#' -#' @details -#' Specifically, this function implements single-machine -#' multi-GPU data parallelism. It works in the following way: -#' - Divide the model's input(s) into multiple sub-batches. -#' - Apply a model copy on each sub-batch. Every model copy -#' is executed on a dedicated GPU. -#' - Concatenate the results (on CPU) into one big batch. -#' -#' E.g. if your `batch_size` is 64 and you use `gpus=2`, -#' then we will divide the input into 2 sub-batches of 32 samples, -#' process each sub-batch on one GPU, then return the full -#' batch of 64 processed samples. -#' -#' This induces quasi-linear speedup on up to 8 GPUs. -#' -#' This function is only available with the TensorFlow backend -#' for the time being. -#' -#' @section Model Saving: -#' -#' To save the multi-gpu model, use [save_model_hdf5()] or -#' [save_model_weights_hdf5()] with the template model (the argument you -#' passed to `multi_gpu_model`), rather than the model returned -#' by `multi_gpu_model`. -#' -#' @examples \dontrun{ -#' -#' library(keras) -#' library(tensorflow) -#' -#' num_samples <- 1000 -#' height <- 224 -#' width <- 224 -#' num_classes <- 1000 -#' -#' # Instantiate the base model (or "template" model). -#' # We recommend doing this with under a CPU device scope, -#' # so that the model's weights are hosted on CPU memory. -#' # Otherwise they may end up hosted on a GPU, which would -#' # complicate weight sharing. -#' with(tf$device("/cpu:0"), { -#' model <- application_xception( -#' weights = NULL, -#' input_shape = c(height, width, 3), -#' classes = num_classes -#' ) -#' }) -#' -#' # Replicates the model on 8 GPUs. -#' # This assumes that your machine has 8 available GPUs. -#' parallel_model <- multi_gpu_model(model, gpus = 8) -#' parallel_model %>% compile( -#' loss = "categorical_crossentropy", -#' optimizer = "rmsprop" -#' ) -#' -#' # Generate dummy data. -#' x <- array(runif(num_samples * height * width*3), -#' dim = c(num_samples, height, width, 3)) -#' y <- array(runif(num_samples * num_classes), -#' dim = c(num_samples, num_classes)) -#' -#' # This `fit` call will be distributed on 8 GPUs. -#' # Since the batch size is 256, each GPU will process 32 samples. -#' parallel_model %>% fit(x, y, epochs = 20, batch_size = 256) -#' -#' # Save model via the template model (which shares the same weights): -#' model %>% save_model_hdf5("my_model.h5") -#' } -#' -#' @family model functions -#' @note This function is deprecated and has been removed from tensorflow on -#' 2020-04-01. To distribute your training across all available GPUS, -#' you can use `tensorflow::tf$distribute$MirroredStrategy()` -#' by creating your model like this: -#' ```r -#' strategy <- tensorflow::tf$distribute$MirroredStrategy() -#' with(strategy$scope(), { -#' model <- application_xception( -#' weights = NULL, -#' input_shape = c(height, width, 3), -#' classes = num_classes -#' }) -#' ``` -#' @keywords internal -#' @export -multi_gpu_model <- function(model, gpus = NULL, cpu_merge = TRUE, cpu_relocation = FALSE) { - - if (is.null(gpus) && keras_version() < "2.1.4") { - stop("You must provide an explicit gpus argument in Keras versions ", - "prior to 2.1.4") - } - - if (tensorflow::tf_version() >= "2.2") - stop("This function is deprecated as of TF version 2.2") - - args <- list( - model = model, - gpus = as_nullable_integer(gpus) - ) - - if (keras_version() >= "2.1.6") { - args$cpu_merge <- cpu_merge - args$cpu_relocation <- cpu_relocation - } - - do.call(resolve_utils()$multi_gpu_model, args) -} - - -#' @importFrom reticulate py_to_r_wrapper -#' @export -py_to_r_wrapper.keras.engine.training.Model <- function(x) { - force(x) - function(object, ...) { - compose_layer(object, x, ...) - } -} - -#' @export -py_to_r_wrapper.kerastools.model.RModel <- function(x) { - force(x) - function(...) { - x$call(...) - } -} - - -#' @export -py_to_r_wrapper.keras.engine.base_layer.Layer <- function(x) { - force(x) - function(object, ...) { - if(missing(object)) - x(...) - else - compose_layer(object, x, ...) - } -} - - -# py_to_r_wrapper.keras.engine.base_layer.Layer <- function(x) { -# force(x) -# function(...) { -# if(!missing(..1) && inherits(..1, "keras.engine.sequential.Sequential")) { -# if(length(list(...)) > 1) -# warning("Other arguments to ... are ignored because layer instance already created") -# model <- ..1 -# model$add(x) -# model -# } else -# x(...) -# } -# } - - -#' Clone a model instance. -#' -#' Model cloning is similar to calling a model on new inputs, except that it -#' creates new layers (and thus new weights) instead of sharing the weights of -#' the existing layers. -#' -#' @param model Instance of Keras model (could be a functional model or a -#' Sequential model). -#' @param input_tensors Optional list of input tensors to build the model upon. -#' If not provided, placeholders will be created. -#' @param clone_function Callable to be used to clone each layer in the target -#' model (except `InputLayer` instances). It takes as argument the layer -#' instance to be cloned, and returns the corresponding layer instance to be -#' used in the model copy. If unspecified, this callable defaults to the -#' following serialization/deserialization function: -#' -#' ```function(layer) layer$`__class__`$from_config(layer$get_config())``` -#' -#' By passing a custom callable, you can customize your copy of the model, -#' e.g. by wrapping certain layers of interest (you might want to replace all -#' LSTM instances with equivalent `Bidirectional(LSTM(...))` instances, for -#' example). -#' -#' @export -clone_model <- function(model, input_tensors = NULL, clone_function = NULL) { - args <- capture_args(match.call()) - do.call(keras$models$clone_model, args) -} - - -#' Configure a Keras model for training -#' -#' @param object Model object to compile. -#' @param optimizer String (name of optimizer) or optimizer instance. For most -#' models, this defaults to `"rmsprop"` -#' @param loss String (name of objective function), objective function or a -#' `keras$losses$Loss` subclass instance. An objective function is any -#' callable with the signature `loss = fn(y_true, y_pred)`, where y_true = -#' ground truth values with shape = `[batch_size, d0, .. dN]`, except sparse -#' loss functions such as sparse categorical crossentropy where shape = -#' `[batch_size, d0, .. dN-1]`. y_pred = predicted values with shape = -#' `[batch_size, d0, .. dN]`. It returns a weighted loss float tensor. If a -#' custom `Loss` instance is used and reduction is set to `NULL`, return value -#' has the shape `[batch_size, d0, .. dN-1]` i.e. per-sample or per-timestep -#' loss values; otherwise, it is a scalar. If the model has multiple outputs, -#' you can use a different loss on each output by passing a dictionary or a -#' list of losses. The loss value that will be minimized by the model will -#' then be the sum of all individual losses, unless `loss_weights` is -#' specified. -#' @param metrics List of metrics to be evaluated by the model during training -#' and testing. Each of this can be a string (name of a built-in function), -#' function or a `keras$metrics$Metric` class instance. See -#' `?tf$keras$metrics`. Typically you will use `metrics=list('accuracy')`. A -#' function is any callable with the signature `result = fn(y_true, y_pred)`. -#' To specify different metrics for different outputs of a multi-output model, -#' you could also pass a dictionary, such as `metrics=list(output_a = -#' 'accuracy', output_b = c('accuracy', 'mse'))`. You can also pass a list to -#' specify a metric or a list of metrics for each output, such as -#' `metrics=list(list('accuracy'), list('accuracy', 'mse'))` or -#' `metrics=list('accuracy', c('accuracy', 'mse'))`. When you pass the strings -#' `'accuracy'` or `'acc'`, this is converted to one of -#' `tf.keras.metrics.BinaryAccuracy`, `tf.keras.metrics.CategoricalAccuracy`, -#' `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss function -#' used and the model output shape. A similar conversion is done for the -#' strings `'crossentropy'` and `'ce'`. -#' @param loss_weights Optional list, dictionary, or named vector specifying -#' scalar numeric coefficients to weight the loss contributions of different -#' model outputs. The loss value that will be minimized by the model will then -#' be the *weighted sum* of all individual losses, weighted by the -#' `loss_weights` coefficients. If a list, it is expected to have a 1:1 -#' mapping to the model's outputs. If a dict, it is expected to map output -#' names (strings) to scalar coefficients. -#' @param weighted_metrics List of metrics to be evaluated and weighted by -#' `sample_weight` or `class_weight` during training and testing. -#' @param run_eagerly Bool. Defaults to `FALSE`. If `TRUE`, this Model's logic -#' will not be wrapped in a `tf.function`. Recommended to leave this as `NULL` -#' unless your Model cannot be run inside a `tf.function`. `run_eagerly=True` -#' is not supported when using -#' `tf.distribute.experimental.ParameterServerStrategy`. If the model's logic -#' uses tensors in R control flow expressions like `if` and `for`, the model -#' is still traceable with `tf.function`, but you will have to enter a -#' `tfautograph::autograph({})` directly. -#' @param steps_per_execution Int. Defaults to 1. The number of batches to run -#' during each `tf.function` call. Running multiple batches inside a single -#' `tf.function` call can greatly improve performance on TPUs or small models -#' with a large Python/R overhead. At most, one full epoch will be run each -#' execution. If a number larger than the size of the epoch is passed, the -#' execution will be truncated to the size of the epoch. Note that if -#' `steps_per_execution` is set to `N`, `Callback.on_batch_begin` and -#' `Callback.on_batch_end` methods will only be called every `N` batches (i.e. -#' before/after each `tf.function` execution). -#' @param ... Arguments supported for backwards compatibility only. -#' @param sample_weight_mode If you need to do timestep-wise sample weighting -#' (2D weights), set this to "temporal". `NULL` defaults to sample-wise -#' weights (1D). If the model has multiple outputs, you can use a different -#' `sample_weight_mode` on each output by passing a list of modes. -#' @param target_tensors By default, Keras will create a placeholder for the -#' model's target, which will be fed with the target data during training. If -#' instead you would like to use your own target tensor (in turn, Keras will -#' not expect external data for these targets at training time), you can -#' specify them via the `target_tensors` argument. It should be a single -#' tensor (for a single-output sequential model). -#' -#' @family model functions -#' -#' @export -compile.keras.engine.training.Model <- - function(object, - optimizer = NULL, - loss = NULL, - metrics = NULL, - loss_weights = NULL, - weighted_metrics = NULL, - run_eagerly = NULL, - steps_per_execution = NULL, - ..., - target_tensors = NULL, - sample_weight_mode = NULL) { - - # give losses a name - loss_name <- substitute(loss) - if (is.function(loss) && - !inherits(loss, "python.builtin.object") && - is.null(attr(loss, "py_function_name", TRUE))) - attr(loss, "py_function_name") <- as_py_name(loss_name) - - # handle metrics - if (!is.null(metrics)) { - if(inherits(metrics, "python.builtin.object") || - is.function(metrics)) - metrics <- list(metrics) - # convert metrics to list if it isn't one - if(is.character(metrics)) - metrics <- as.list(metrics) - - # get metric names (if any) - metric_names <- names(metrics) - if (is.null(metric_names)) - metric_names <- rep_len("", length(metrics)) - - # if all the metrics names are output names then leave them alone - # (just convert to a list with no special processing) - if (py_has_attr(object, "output_names") && - all(metric_names %in% object$output_names)) { - metrics <- as.list(metrics) - } else { - # convert metrics to a list (adding names to any custom functions) - metrics <- lapply(1:length(metrics), function(i) { - metric <- metrics[[i]] - - if (is.function(metric) && nzchar(metric_names[[i]])) { - warning( - "Passing names for custom metrics is deprecated. Please use the ", - "custom_metric() function to define custom metrics." - ) - attr(metric, "py_function_name") <- metric_names[[i]] - } - - metric - }) - } - } - - # keras 2.07 args - if (keras_version() >= "2.0.7") { - # weighted metrics - if (!is.null(weighted_metrics) && !is.list(weighted_metrics)) - weighted_metrics <- list(weighted_metrics) - - # target tensors - if (!is.null(target_tensors) && !is.list(target_tensors)) - target_tensors <- list(target_tensors) - } - - if (is.numeric(loss_weights)) - storage.mode(loss_weights) <- "list" - - args <- list( - optimizer = optimizer, - loss = loss, - metrics = metrics, - loss_weights = loss_weights, - weighted_metrics = weighted_metrics, - run_eagerly = run_eagerly, - steps_per_execution = steps_per_execution, - sample_weight_mode = sample_weight_mode, - target_tensors = target_tensors - ) - - # drop NULLs - for (nm in names(args)) - args[[nm]] <- args[[nm]] - - args <- c(list(), args, ...) - - # compile model - do.call(object$compile, args) - - # return model invisible (convenience for chaining) - invisible(object) - } - -as_py_name <- function(x) { - if(is.language(x)) - x <- deparse(x, width.cutoff = 500L)[1] - x <- make.names(as.character(x)) - x <- gsub(".", "_", x, fixed = TRUE) - x -} - -#drop_nulls <- -function(x, ...) { - nms <- c(...) - nms <- if (length(nms)) - intersect(names(x), nms) - else - names(args) - - for (nm in nms) - x[[nm]] <- x[[nm]] - x -} - - -resolve_input_data <- function(x, y = NULL) { - # resolve x and y (check for TF dataset) - dataset <- resolve_tensorflow_dataset(x) - args <- list() - if (inherits(dataset, "tensorflow.python.data.ops.dataset_ops.DatasetV2")) { - args$x <- dataset - } else if (!is.null(dataset)) { - args$x <- dataset[[1]] - args$y <- dataset[[2]] - } else if (is.function(x)) { - args$x <- as_generator(x) - } else if (inherits(x, "python.builtin.iterator")) { - args$x <- x - } else if (inherits(x, "keras.utils.data_utils.Sequence")) { - args$x <- x - } else { - if (!is.null(x)) - args$x <- keras_array(x) - if (!is.null(y)) - args$y <- keras_array(y) - } - args -} - -resolve_validation_data <- function(validation_data) { - args <- list() - if (!is.null(validation_data)) { - dataset <- resolve_tensorflow_dataset(validation_data) - if (!is.null(dataset)) - args$validation_data <- dataset - else if (is.function(validation_data)) - args$validation_data <- as_generator(validation_data) - else if (inherits(validation_data, "python.builtin.iterator")) - args$validation_data <- validation_data - else if (inherits(validation_data, "keras.utils.data_utils.Sequence")) - args$validation_data <- validation_data - else { - args$validation_data <- keras_array(validation_data) - if (tensorflow::tf_version() >="2.2") - args$validation_data <- do.call(reticulate::tuple, args$validation_data) - } - } - args -} - -resolve_main_thread_generators <- function(x, callback_type = "on_train_batch_begin") { - - if (tensorflow::tf_version() == "2.1") - stop("Using generators that call R functions is not supported in TensorFlow 2.1 ", - "Please upgrade your TF installation or downgrade to 2.0", call. = FALSE) - - # we need a hack to make sure the generator is evaluated in the main thread. - python_path <- system.file("python", package = "keras") - tools <- reticulate::import_from_path("kerastools", path = python_path) - - # as_generator will return a tuple with 2 elements. - # (1) a python generator that just consumes - # a queue. - # (2) a function that evaluates the next element of the generator - # and adds to the queue. This function should be called in the main - # thread. - # we add a `on_train_batch_begin` to call this function. - o <- tools$model$as_generator(x) - - callback <- list(function(batch, logs) { - o[[2]]() - }) - names(callback) <- callback_type - - if (callback_type == "on_test_batch_begin") { - callback[[2]] <- callback[[1]] - names(callback)[[2]] <- "on_test_begin" - } - - callback <- do.call(callback_lambda, callback) - - list(generator = o[[1]], callback = callback) -} - -#' Train a Keras model -#' -#' Trains the model for a fixed number of epochs (iterations on a dataset). -#' -#' @param object Model to train. -#' @param x Vector, matrix, or array of training data (or list if the model has -#' multiple inputs). If all inputs in the model are named, you can also pass a -#' list mapping input names to data. `x` can be `NULL` (default) if feeding -#' from framework-native tensors (e.g. TensorFlow data tensors). You can also -#' pass a `tfdataset` or a generator returning a list with `(inputs, targets)` or -#' `(inputs, targets, sample_weights)`. -#' @param y Vector, matrix, or array of target (label) data (or list if the model has -#' multiple outputs). If all outputs in the model are named, you can also pass -#' a list mapping output names to data. `y` can be `NULL` (default) if feeding -#' from framework-native tensors (e.g. TensorFlow data tensors). -#' @param batch_size Integer or `NULL`. Number of samples per gradient update. -#' If unspecified, `batch_size` will default to 32. -#' @param epochs Number of epochs to train the model. -#' Note that in conjunction with `initial_epoch`, -#' `epochs` is to be understood as "final epoch". The model is -#' not trained for a number of iterations given by `epochs`, but -#' merely until the epoch of index `epochs` is reached. -#' @param verbose Verbosity mode (0 = silent, 1 = progress bar, 2 = one line per -#' epoch). -#' @param view_metrics View realtime plot of training metrics (by epoch). The -#' default (`"auto"`) will display the plot when running within RStudio, -#' `metrics` were specified during model [compile()], `epochs > 1` and -#' `verbose > 0`. Use the global `keras.view_metrics` option to establish a -#' different default. -#' @param callbacks List of callbacks to be called during training. -#' @param validation_split Float between 0 and 1. Fraction of the training data -#' to be used as validation data. The model will set apart this fraction of -#' the training data, will not train on it, and will evaluate the loss and any -#' model metrics on this data at the end of each epoch. The validation data -#' is selected from the last samples in the `x` and `y` data provided, -#' before shuffling. -#' @param validation_data Data on which to evaluate the loss and any model -#' metrics at the end of each epoch. The model will not be trained on this -#' data. This could be a list (x_val, y_val) or a list (x_val, y_val, -#' val_sample_weights). `validation_data` will override `validation_split`. -#' @param shuffle shuffle: Logical (whether to shuffle the training data -#' before each epoch) or string (for "batch"). "batch" is a special option -#' for dealing with the limitations of HDF5 data; it shuffles in batch-sized -#' chunks. Has no effect when `steps_per_epoch` is not `NULL`. -#' @param class_weight Optional named list mapping indices (integers) to a -#' weight (float) value, used for weighting the loss function -#' (during training only). This can be useful to tell the model to -#' "pay more attention" to samples from an under-represented class. -#' @param sample_weight Optional array of the same length as x, containing -#' weights to apply to the model's loss for each sample. In the case of -#' temporal data, you can pass a 2D array with shape (samples, -#' sequence_length), to apply a different weight to every timestep of every -#' sample. In this case you should make sure to specify -#' `sample_weight_mode="temporal"` in [compile()]. -#' @param initial_epoch Integer, Epoch at which to start training (useful for -#' resuming a previous training run). -#' @param steps_per_epoch Total number of steps (batches of samples) before -#' declaring one epoch finished and starting the next epoch. When training -#' with input tensors such as TensorFlow data tensors, the default `NULL` is -#' equal to the number of samples in your dataset divided by the batch -#' size, or 1 if that cannot be determined. -#' @param validation_steps Only relevant if `steps_per_epoch` is specified. -#' Total number of steps (batches of samples) to validate before stopping. -#' @param ... Unused -#' -#' @return A `history` object that contains all information collected -#' during training. -#' -#' @family model functions -#' -#' @export -fit.keras.engine.training.Model <- - function(object, x = NULL, y = NULL, batch_size=NULL, epochs=10, - verbose=getOption("keras.fit_verbose", default = "auto"), callbacks=NULL, - view_metrics = getOption("keras.view_metrics", default = "auto"), - validation_split=0.0, validation_data=NULL, shuffle=TRUE, - class_weight=NULL, sample_weight=NULL, initial_epoch=0, - steps_per_epoch=NULL, validation_steps=NULL, ...) { - - if (!is.null(batch_size) && is_tensorflow_dataset(x)) - stop("Don't set batch_size with a tfdataset as input.", call. = FALSE) - - # defaults - if (is.null(batch_size) && is.null(steps_per_epoch) && !is_tensorflow_dataset(x)) - batch_size <- 32L - - # resolve view_metrics - if (identical(view_metrics, "auto")) - view_metrics <- resolve_view_metrics(verbose, epochs, object$metrics) - - # build args - args <- list( - batch_size = as_nullable_integer(batch_size), - epochs = as.integer(epochs), - verbose = as_model_verbose_arg(verbose), - validation_split = validation_split, - shuffle = shuffle, - class_weight = as_class_weight(class_weight), - sample_weight = keras_array(sample_weight), - initial_epoch = as.integer(initial_epoch) - ) - - args <- append(args, resolve_input_data(x, y)) - args <- append(args, resolve_validation_data(validation_data)) - - if (keras_version() >= "2.0.7") { - args$steps_per_epoch <- as_nullable_integer(steps_per_epoch) - args$validation_steps <- as_nullable_integer(validation_steps) - } - - extra_callbacks <- list() - if (is_main_thread_generator(x)) { - main_thr <- resolve_main_thread_generators(args$x) - args$x <- main_thr$generator - extra_callbacks <- c(extra_callbacks, main_thr$callback) - } - - if (is_main_thread_generator(validation_data)) { - main_thr <- resolve_main_thread_generators(args$validation_data, "on_test_batch_begin") - args$validation_data <- main_thr$generator - extra_callbacks <- c(extra_callbacks, main_thr$callback) - } - - if (length(extra_callbacks) > 0) { - callbacks <- c(callbacks, extra_callbacks) - } - - args$callbacks <- normalize_callbacks_with_metrics(view_metrics, initial_epoch, callbacks) - history <- do.call(object$fit, args) - - # convert to a keras_training history object - history <- to_keras_training_history(history) - - # write metadata contained in history - write_history_metadata(history) - - # return the history invisibly - invisible(history) -} - -#' Evaluate a Keras model - -#' @inheritParams fit.keras.engine.training.Model -#' -#' @param object Model object to evaluate -#' @param x Vector, matrix, or array of test data (or list if the model has -#' multiple inputs). If all inputs in the model are named, you can also pass a -#' list mapping input names to data. `x` can be `NULL` (default) if feeding -#' from framework-native tensors (e.g. TensorFlow data tensors). You can also -#' pass a `tfdataset` or a generator returning a list with `(inputs, targets)` or -#' `(inputs, targets, sample_weights)`. -#' @param y Vector, matrix, or array of target (label) data (or list if the model has -#' multiple outputs). If all outputs in the model are named, you can also pass -#' a list mapping output names to data. `y` can be `NULL` (default) if feeding -#' from framework-native tensors (e.g. TensorFlow data tensors). -#' @param steps Total number of steps (batches of samples) before declaring the -#' evaluation round finished. Ignored with the default value of `NULL`. -#' @param callbacks List of callbacks to apply during evaluation. -#' @param ... Unused -#' -#' -#' @return Named list of model test loss (or losses for models with multiple -#' outputs) and model metrics. -#' -#' @family model functions -#' -#' @export -evaluate.keras.engine.training.Model <- function(object, x = NULL, y = NULL, batch_size = NULL, - verbose="auto", sample_weight = NULL, steps = NULL, - callbacks = NULL, ...) { - - # defaults - if (is.null(batch_size) && is.null(steps) &&!is_tensorflow_dataset(x)) - batch_size <- 32L - - # args - args <- list( - batch_size = as_nullable_integer(batch_size), - verbose = as_model_verbose_arg(verbose), - sample_weight = sample_weight - ) - - args <- append(args, resolve_input_data(x, y)) - - extra_callbacks <- list() - if (is_main_thread_generator(x)) { - main_thr <- resolve_main_thread_generators(args$x, "on_test_batch_begin") - args$x <- main_thr$generator - extra_callbacks <- c(extra_callbacks, main_thr$callback) - } - - if (length(extra_callbacks) > 0) { - callbacks <- c(callbacks, extra_callbacks) - } - - args <- resolve_callbacks(args, callbacks) - - if (keras_version() >= "2.0.7") - args$steps <- as_nullable_integer(steps) - - # perform evaluation - result <- do.call(object$evaluate, args) - - # apply names - names(result) <- object$metrics_names - - # write run data - tfruns::write_run_metadata("evaluation", result) - - # return result - result -} - -resolve_callbacks <- function(args, callbacks) { - if (get_keras_implementation() == "tensorflow" && tensorflow::tf_version() >= "2.0") { - args <- append(args, list(callbacks = normalize_callbacks(callbacks))) - } else if (!is.null(callbacks)) { - warning("Prediction callbacks are only supported for TensorFlow ", - "implementation of Keras. And tf_version() >= 2.0") - } - args -} - -#' Generate predictions from a Keras model -#' -#' Generates output predictions for the input samples, processing the samples in -#' a batched way. -#' -#' @inheritParams evaluate.keras.engine.training.Model -#' -#' @param object Keras model -#' @param x Input data (vector, matrix, or array). You can also -#' pass a `tfdataset` or a generator returning a list with `(inputs, targets)` or -#' `(inputs, targets, sample_weights)`. -#' @param batch_size Integer. If unspecified, it will default to 32. -#' @param verbose Verbosity mode, 0, 1, 2, or "auto". "auto" defaults to 1 -#' for for most cases and defaults to `verbose=2` when used with -#' ParameterServerStrategy or with interactive logging disabled. -#' @param callbacks List of callbacks to apply during prediction. -#' @param ... Unused -#' -#' @return vector, matrix, or array of predictions -#' -#' @family model functions -#' -#' -#' @importFrom stats predict -#' @export -predict.keras.engine.training.Model <- -function(object, - x, - batch_size = NULL, - verbose = "auto", - steps = NULL, - callbacks = NULL, - ...) { - - # defaults - if (is.null(batch_size) && is.null(steps) &&!is_tensorflow_dataset(x)) - batch_size <- 32L - - - # args - args <- list( - batch_size = as_nullable_integer(batch_size), - verbose = as_model_verbose_arg(verbose, 0L) - ) - - - args <- append(args, resolve_input_data(x)) - - extra_callbacks <- list() - if (is_main_thread_generator(x)) { - main_thr <- resolve_main_thread_generators(args$x, "on_predict_batch_begin") - args$x <- main_thr$generator - extra_callbacks <- c(extra_callbacks, main_thr$callback) - } - - if (length(extra_callbacks) > 0) { - callbacks <- c(callbacks, extra_callbacks) - } - - args <- resolve_callbacks(args, callbacks) - - if (keras_version() >= "2.0.7") - args$steps <- as_nullable_integer(steps) - - # call predict - do.call(object$predict, args) -} - -as_model_verbose_arg <- function(x, old_default = 1L) { - if(tf_version() < "2.9" && x == "auto") - return(old_default) - if(x == "auto") x else as.integer(x) -} - - -#' (Deprecated) Generates probability or class probability predictions for the input samples. -#' -#' These functions were removed in Tensorflow version 2.6. See details for how to update your code: -#' -#' @details How to update your code: -#' -#' `predict_proba()`: use `predict()` directly. -#' -#' `predict_classes()`: -#' * If your model does multi-class classification: -#' (e.g. if it uses a `softmax` last-layer activation). -#' ```r -#' model %>% predict(x) %>% k_argmax() -#' ``` -#' * if your model does binary classification -#' (e.g. if it uses a `sigmoid` last-layer activation). -#' ```r -#' model %>% predict(x) %>% `>`(0.5) %>% k_cast("int32") -#' ``` -#' -#' @inheritParams predict.keras.engine.training.Model -#' -#' @param object Keras model object -#' @param steps Total number of steps (batches of samples) before declaring the -#' evaluation round finished. The default `NULL` is equal to the number of -#' samples in your dataset divided by the batch size. -#' -#' @details The input samples are processed batch by batch. -#' -#' @family model functions -#' -#' @keywords internal -#' @export -predict_proba <- function(object, x, batch_size = NULL, verbose = 0, steps = NULL) { - warning("`predict_proba()` is deprecated and was removed from tensorflow in version 2.6, ", - "please use `predict()` instead") - args <- list( - batch_size = as_nullable_integer(batch_size), - verbose = as.integer(verbose) - ) - - # resolve x (check for TF dataset) - dataset <- resolve_tensorflow_dataset(x) - if (!is.null(dataset)) { - args$x <- dataset[[1]] - } else { - args$x <- keras_array(x) - } - - if (keras_version() >= "2.1.3") - args$steps <- as_nullable_integer(steps) - - do.call(object$predict_proba, args) -} - -#' @rdname predict_proba -#' @keywords internal -#' @export -predict_classes <- function(object, x, batch_size = NULL, verbose = 0, steps = NULL) { - warning( -'`predict_classes()` is deprecated and and was removed from tensorflow in version 2.6. -Please update your code: - * If your model does multi-class classification: - (e.g. if it uses a `softmax` last-layer activation). - - model %>% predict(x) %>% k_argmax() - - * if your model does binary classification - (e.g. if it uses a `sigmoid` last-layer activation). - - model %>% predict(x) %>% `>`(0.5) %>% k_cast("int32") -' - ) - args <- list( - batch_size = as_nullable_integer(batch_size), - verbose = as.integer(verbose) - ) - - # resolve x (check for TF dataset) - dataset <- resolve_tensorflow_dataset(x) - if (!is.null(dataset)) { - args$x <- dataset[[1]] - } else { - args$x <- keras_array(x) - } - - if (keras_version() >= "2.1.3") - args$steps <- as_nullable_integer(steps) - - do.call(object$predict_classes, args) -} - -#' Returns predictions for a single batch of samples. -#' -#' @inheritParams predict.keras.engine.training.Model -#' -#' @param object Keras model object -#' -#' @return array of predictions. -#' -#' @family model functions -#' -#' @export -predict_on_batch <- function(object, x) { - object$predict_on_batch( - x = keras_array(x) - ) -} - - -#' Single gradient update or model evaluation over one batch of samples. -#' -#' @param object Keras model object -#' @param x input data, as an array or list of arrays (if the model has multiple -#' inputs). -#' @param y labels, as an array. -#' @param class_weight named list mapping classes to a weight value, used for -#' scaling the loss function (during training only). -#' @param sample_weight sample weights, as an array. -#' -#' @return Scalar training or test loss (if the model has no metrics) or list of scalars -#' (if the model computes other metrics). The property `model$metrics_names` -#' will give you the display labels for the scalar outputs. -#' -#' @family model functions -#' -#' @export -train_on_batch <- function(object, x, y, class_weight = NULL, sample_weight = NULL) { - object$train_on_batch( - x = keras_array(x), - y = keras_array(y), - class_weight = as_class_weight(class_weight), - sample_weight = sample_weight - ) -} - -#' @rdname train_on_batch -#' @export -test_on_batch <- function(object, x, y, sample_weight = NULL) { - object$test_on_batch( - x = keras_array(x), - y = keras_array(y), - sample_weight = sample_weight - ) -} - - - -#' (Deprecated) Fits the model on data yielded batch-by-batch by a generator. -#' -#' The generator is run in parallel to the model, for efficiency. For instance, -#' this allows you to do real-time data augmentation on images on CPU in -#' parallel to training your model on GPU. -#' -#' @inheritParams fit.keras.engine.training.Model -#' -#' @param object Keras model object -#' @param generator A generator (e.g. like the one provided by -#' [flow_images_from_directory()] or a custom R -#' [generator function](https://rstudio.github.io/reticulate/articles/calling_python.html#generators-1)). -#' -#' The output of the generator must be a list of one of these forms: -#' -#' - (inputs, targets) -#' - (inputs, targets, sample_weights) -#' -#' This list (a single output of the generator) makes a single batch. -#' Therefore, all arrays in this list must have the same length (equal to -#' the size of this batch). Different batches may have different sizes. -#' For example, the last batch of the epoch is commonly smaller than the -#' others, if the size of the dataset is not divisible by the batch size. -#' The generator is expected to loop over its data indefinitely. An epoch -#' finishes when `steps_per_epoch` batches have been seen by the model. -#' @param steps_per_epoch Total number of steps (batches of samples) to yield -#' from `generator` before declaring one epoch finished and starting the next -#' epoch. It should typically be equal to the number of samples if your -#' dataset divided by the batch size. -#' @param epochs Integer. Number of epochs to train the model. -#' An epoch is an iteration over the entire data provided, as defined by -#' `steps_per_epoch`. Note that in conjunction with `initial_epoch`, -#' `epochs` is to be understood as "final epoch". The model is not trained -#' for a number of iterations given by `epochs`, but merely until the epoch -#' of index `epochs` is reached. -#' @param callbacks List of callbacks to apply during training. -#' @param validation_data this can be either: -#' - a generator for the validation data -#' - a list (inputs, targets) -#' - a list (inputs, targets, sample_weights). -#' on which to evaluate -#' the loss and any model metrics at the end of each epoch. -#' The model will not be trained on this data. -#' @param validation_steps Only relevant if `validation_data` is a generator. -#' Total number of steps (batches of samples) to yield from `generator` before -#' stopping at the end of every epoch. It should typically be equal to the number -#' of samples of your validation dataset divided by the batch size. -#' @param class_weight Optional named list mapping class indices (integer) to a -#' weight (float) value, used for weighting the loss function (during -#' training only). This can be useful to tell the model to "pay more -#' attention" to samples from an under-represented class. -#' @param max_queue_size Maximum size for the generator queue. If unspecified, -#' `max_queue_size` will default to 10. -#' @param workers Maximum number of threads to use for parallel processing. Note that -#' parallel processing will only be performed for native Keras generators (e.g. -#' `flow_images_from_directory()`) as R based generators must run on the main thread. -#' @param initial_epoch epoch at which to start training (useful for resuming a -#' previous training run) -#' -#' @return Training history object (invisibly) -#' -#' @family model functions -#' -#' @keywords internal -#' @export -fit_generator <- function(object, generator, steps_per_epoch, epochs = 1, - verbose=getOption("keras.fit_verbose", default = 1), callbacks = NULL, - view_metrics = getOption("keras.view_metrics", default = "auto"), - validation_data = NULL, validation_steps = NULL, - class_weight = NULL, max_queue_size = 10, workers = 1, initial_epoch = 0) { - - if (tensorflow::tf_version() <= "2.0") - return(fit_generator_legacy( - object = object, - generator = generator, - steps_per_epoch = steps_per_epoch, - epochs = epochs, - verbose=verbose, - view_metrics = view_metrics, - validation_data = validation_data, - validation_steps = validation_steps, - class_weight = class_weight, - max_queue_size = max_queue_size, - workers = workers, - initial_epoch = initial_epoch - )) - - warning("`fit_generator` is deprecated. Use `fit` instead, it now accept generators.") - - # redirect to `model.fit` - args <- list( - object = object, - x = generator, - steps_per_epoch = steps_per_epoch, - epochs = epochs, - verbose = verbose, - callbacks = callbacks, - validation_data = validation_data, - validation_steps = validation_steps, - class_weight = class_weight, - max_queue_size = max_queue_size, - workers = workers, - initial_epoch = initial_epoch - ) - - do.call(fit, args) -} - -#' (Deprecated) Evaluates the model on a data generator. -#' -#' The generator should return the same kind of data as accepted by -#' `test_on_batch()`. -#' -#' @inheritParams evaluate.keras.engine.training.Model -#' @inheritParams fit_generator -#' -#' @param generator Generator yielding lists (inputs, targets) or (inputs, -#' targets, sample_weights) -#' @param steps Total number of steps (batches of samples) to yield from -#' `generator` before stopping. -#' -#' @return Named list of model test loss (or losses for models with multiple outputs) -#' and model metrics. -#' -#' @family model functions -#' -#' @keywords internal -#' @export -evaluate_generator <- function(object, generator, steps, max_queue_size = 10, workers = 1, - callbacks = NULL) { - - if (tensorflow::tf_version() <= "2.0") - return(evaluate_generator_legacy( - object, generator, steps, max_queue_size, workers, - callbacks)) - - warning("`evaluate_generator` is deprecated. Use `evaluate` instead, it now accept generators.") - - args <- list( - object = object, - x = generator, - steps = as.integer(steps), - max_queue_size = as.integer(max_queue_size), - workers = as.integer(workers), - callbacks = callbacks - ) - - do.call(evaluate, args) -} - - -#' (Deprecated) Generates predictions for the input samples from a data generator. -#' -#' The generator should return the same kind of data as accepted by -#' `predict_on_batch()`. -#' -#' @inheritParams predict.keras.engine.training.Model -#' @inheritParams fit_generator -#' -#' @param object Keras model object -#' @param generator Generator yielding batches of input samples. -#' @param steps Total number of steps (batches of samples) to yield from -#' `generator` before stopping. -#' @param verbose verbosity mode, 0 or 1. -#' -#' @return Numpy array(s) of predictions. -#' -#' @section Raises: ValueError: In case the generator yields data in an invalid -#' format. -#' -#' @family model functions -#' -#' @keywords internal -#' @export -predict_generator <- function(object, generator, steps, max_queue_size = 10, workers = 1, verbose = 0, - callbacks = NULL) { - - if (tensorflow::tf_version() <= "2.0") - return(predict_generator_legacy(object, generator, steps, max_queue_size, - workers, verbose, callbacks)) - - warning("`predict_generator` is deprecated. Use `predict` instead, it now accept generators.") - - args <- list( - object = object, - x = generator, - steps = as.integer(steps), - max_queue_size = as.integer(max_queue_size), - workers = as.integer(workers), - verbose = as.integer(verbose), - callbacks = callbacks - ) - - do.call(predict, args) -} - -as_generator <- function(x) { - UseMethod("as_generator") -} - -as_generator.default <- function(x) { - x -} - -as_generator.tensorflow.python.data.ops.dataset_ops.Dataset <- function(x) { - python_path <- system.file("python", package = "keras") - tools <- reticulate::import_from_path("kerastools", path = python_path) - tools$generator$dataset_generator(x , k_get_session()) -} - -as_generator.tensorflow.python.data.ops.dataset_ops.DatasetV2 <- function(x) { - - if (tensorflow::tf_version() >= "2.0") - x - else - as_generator.tensorflow.python.data.ops.dataset_ops.Dataset(x) - -} - -as_generator.function <- function(x) { - python_path <- system.file("python", package = "keras") - tools <- reticulate::import_from_path("kerastools", path = python_path) - iter <- reticulate::py_iterator(function() { - elem <- keras_array(x()) - - # deals with the case where the generator is used for prediction and only - # yields x's values. - if (length(elem) == 1) - elem[[2]] <- list() - - do.call(reticulate::tuple, elem) - }) - tools$generator$iter_generator(iter) -} - -as_generator.keras_preprocessing.sequence.TimeseriesGenerator <- function(x) { - reticulate::as_iterator(x) -} - -is_main_thread_generator <- function(x) { - UseMethod("is_main_thread_generator") -} - -is_main_thread_generator.default <- function(x) { - FALSE -} - -is_main_thread_generator.tensorflow.python.data.ops.dataset_ops.Dataset <- function(x) { - TRUE -} - -is_main_thread_generator.function <- function(x) { - TRUE -} - -is_main_thread_generator.keras.preprocessing.image.Iterator <- function(x) { - if (py_has_attr(x, "image_data_generator")) { - generator <- x$image_data_generator - !is.null(generator$preprocessing_function) - } else { - FALSE - } -} - -is_main_thread_generator.keras_preprocessing.image.Iterator <- function(x) { - - if (tensorflow::tf_version() <= "2.0.1") - return(TRUE) - - if (py_has_attr(x, "image_data_generator")) { - generator <- x$image_data_generator - !is.null(generator$preprocessing_function) - } else { - FALSE - } -} - -is_main_thread_generator.keras_preprocessing.image.iterator.Iterator <- - is_main_thread_generator.keras_preprocessing.image.Iterator - -is_main_thread_generator.keras_preprocessing.sequence.TimeseriesGenerator <- function(x) { - if (tensorflow::tf_version() <= "2.0.1") - return(TRUE) - - FALSE -} - -is_tensorflow_dataset <- function(x) { - inherits(x, "tensorflow.python.data.ops.dataset_ops.DatasetV2") || - inherits(x, "tensorflow.python.data.ops.dataset_ops.Dataset") -} - -resolve_tensorflow_dataset <- function(x) { - - if (is_tensorflow_dataset(x)) { - - # check version compatibility - - if (is_tensorflow_implementation()) { - if (tensorflow::tf_version() < "1.9") - stop("TensorFlow v1.9 or higher is required for direct tensor input to models", call. = FALSE) - } else { - if (keras_version() < "2.2.0") - stop("Keras v2.2 or higher is required for direct tensor input to models", call. = FALSE) - if (!is_backend("tensorflow")) - stop("The tensorflow backend is required for direct tensor input to models", call. = FALSE) - if (tensorflow::tf_version() < "1.8") - stop("TensorFlow v1.8 or higher is required for direct tensor input to models", call. = FALSE) - } - - - if (tensorflow::tf_version() < "1.14.0") { - # yield iterators - iter = x$make_one_shot_iterator() - iter$get_next() - } else { - x - } - - } else { - NULL - } -} - - - - -#' Retrieves a layer based on either its name (unique) or index. -#' -#' Indices are based on order of horizontal graph traversal (bottom-up) and are -#' 1-based. If `name` and `index` are both provided, `index` will take -#' precedence. -#' -#' @param object Keras model object -#' @param name String, name of layer. -#' @param index Integer, index of layer (1-based). Also valid are negative -#' values, which count from the end of model. -#' -#' @return A layer instance. -#' -#' @family model functions -#' -#' @export -get_layer <- function(object, name = NULL, index = NULL) { - object$get_layer( - name = name, - index = as_layer_index(index) - ) -} - - -#' Remove the last layer in a model -#' -#' @param object Keras model object -#' -#' @family model functions -#' -#' @export -pop_layer <- function(object) { - object$pop() -} - - -#' Print a summary of a Keras model -#' -#' @param object,x Keras model instance -#' @param line_length Total length of printed lines -#' @param positions Relative or absolute positions of log elements in each line. -#' If not provided, defaults to `c(0.33, 0.55, 0.67, 1.0)`. -#' @param expand_nested Whether to expand the nested models. If not provided, -#' defaults to `FALSE`. -#' @param show_trainable Whether to show if a layer is trainable. If not -#' provided, defaults to `FALSE`. -#' @param compact Whether to remove white-space only lines from the model -#' summary. (Default `TRUE`) -#' @param width the column width to use for printing. -#' @param ... for `summary()` and `print()`, passed on to `format()`. For -#' `format()`, passed on to `model$summary()`. -#' -#' @family model functions -#' -#' @return `format()` returns a length 1 character vector. `print()` returns the -#' model object invisibly. `summary()` returns the output of `format()` -#' invisibly after printing it. -#' -#' @export -summary.keras.engine.training.Model <- function(object, ...) { - writeLines(f <- format.keras.engine.training.Model(object, ...)) - invisible(f) -} - -#' @rdname summary.keras.engine.training.Model -#' @export -format.keras.engine.training.Model <- -function(x, - line_length = width - (11L * show_trainable), - positions = NULL, - expand_nested = FALSE, - show_trainable = x$built && as.logical(length(x$non_trainable_weights)), - ..., - compact = TRUE, - width = getOption("width")) { - - if (py_is_null_xptr(x)) - return("") - - args <- capture_args(match.call(), ignore = c("x", "compact", "width")) - - # ensure `line_length` and other args captured, even if not passed by user - args$line_length <- as_nullable_integer(line_length) - if(tf_version() >= "2.8") - args$show_trainable <- show_trainable - - out <- if (x$built) - trimws(py_capture_output(do.call(x$summary, args), - type = "stdout")) - else - "Model: " - - if(compact) { - # strip empty lines - out <- gsub("(\\n\\s*\\n)", "\n", out, perl = TRUE) - if(expand_nested) - out <- gsub("\\n\\|\\s+\\|\\n", "\n", out) - } - out -} - -# -#' @rdname summary.keras.engine.training.Model -#' @export -print.keras.engine.training.Model <- function(x, ...) { - writeLines(format.keras.engine.training.Model(x, ...)) - invisible(x) -} - -#' @importFrom reticulate py_str -#' @export -py_str.keras.engine.training.Model <- function(object, line_length = getOption("width"), positions = NULL, ...) { - # still invoked by utils::str() - # warning("`py_str()` generic is deprecated") - format.keras.engine.training.Model(object, line_length = line_length, positions = positions, ...) -} - - -# determine whether to view metrics or not -resolve_view_metrics <- function(verbose, epochs, metrics) { - (epochs > 1) && # more than 1 epoch - (verbose > 0) && # verbose mode is on - !is.null(getOption("viewer")) && # have an internal viewer available - nzchar(Sys.getenv("RSTUDIO")) # running under RStudio -} - - -write_history_metadata <- function(history) { - properties <- list() - properties$validation_samples <- history$params$validation_samples - tfruns::write_run_metadata("properties", properties) -} - - -as_class_weight <- function(class_weight) { - # convert class weights to python dict - if (!is.null(class_weight)) { - if (is.list(class_weight)) - class_weight <- dict(class_weight) - else - stop("class_weight must be a named list of weights") - } -} - -have_module <- function(module) { - tryCatch({ import(module); TRUE; }, error = function(e) FALSE) -} - -have_h5py <- function() { - have_module("h5py") -} - -have_pyyaml <- function() { - have_module("yaml") -} - -have_requests <- function() { - have_module("requests") -} - -have_pillow <- function() { - have_module("PIL") # aka Pillow -} - -confirm_overwrite <- function(filepath, overwrite) { - if (overwrite) - TRUE - else { - if (file.exists(filepath)) { - if (interactive()) { - prompt <- readline(sprintf("[WARNING] %s already exists - overwrite? [y/n] ", filepath)) - tolower(prompt) == 'y' - } else { - stop("File '", filepath, "' already exists (pass overwrite = TRUE to force save).", - call. = FALSE) - } - } else { - TRUE - } - } -} diff --git a/R/module-tethers.R b/R/module-tethers.R new file mode 100644 index 0000000000..6fbe424ea1 --- /dev/null +++ b/R/module-tethers.R @@ -0,0 +1,260 @@ +#' @title keras.activations +#' @name keras.activations +#' @tether keras.activations +#' @noRd +NULL + + +#' @title keras.applications +#' @name keras.applications +#' @tether keras.applications +#' @noRd +NULL + + +#' @title keras.backend +#' @name keras.backend +#' @tether keras.backend +#' @noRd +NULL + + +#' @title keras.callbacks +#' @name keras.callbacks +#' @tether keras.callbacks +#' @noRd +NULL + + +#' @title keras.config +#' @name keras.config +#' @tether keras.config +#' @noRd +NULL + + +#' @title keras.constraints +#' @name keras.constraints +#' @tether keras.constraints +#' @noRd +NULL + + +#' @title keras.datasets +#' @name keras.datasets +#' @tether keras.datasets +#' @noRd +NULL + + +#' @title keras.distribution +#' @name keras.distribution +#' @tether keras.distribution +#' @noRd +NULL + + +#' @title keras.export +#' @name keras.export +#' @tether keras.export +#' @noRd +NULL + + +#' @title keras.initializers +#' @name keras.initializers +#' @tether keras.initializers +#' @noRd +NULL + + +#' @title keras.layers +#' @name keras.layers +#' @tether keras.layers +#' @noRd +NULL + + +#' @title keras.legacy.saving +#' @name keras.legacy.saving +#' @tether keras.legacy.saving +#' @noRd +NULL + + +#' @title keras.legacy +#' @name keras.legacy +#' @tether keras.legacy +#' @noRd +NULL + + +#' @title keras.losses +#' @name keras.losses +#' @tether keras.losses +#' @noRd +NULL + + +#' @title keras.metrics +#' @name keras.metrics +#' @tether keras.metrics +#' @noRd +NULL + + +#' @title keras.mixed_precision +#' @name keras.mixed_precision +#' @tether keras.mixed_precision +#' @noRd +NULL + + +#' @title keras.models +#' @name keras.models +#' @tether keras.models +#' @noRd +NULL + + +#' @title keras.ops.image +#' @name keras.ops.image +#' @tether keras.ops.image +#' @noRd +NULL + + +#' @title keras.ops.nn +#' @name keras.ops.nn +#' @tether keras.ops.nn +#' @noRd +NULL + + +#' @title keras.ops.numpy +#' @name keras.ops.numpy +#' @tether keras.ops.numpy +#' @noRd +NULL + + +#' @title keras.ops +#' @name keras.ops +#' @tether keras.ops +#' @noRd +NULL + + +#' @title keras.optimizers.legacy +#' @name keras.optimizers.legacy +#' @tether keras.optimizers.legacy +#' @noRd +NULL + + +#' @title keras.optimizers.schedules +#' @name keras.optimizers.schedules +#' @tether keras.optimizers.schedules +#' @noRd +NULL + + +#' @title keras.optimizers +#' @name keras.optimizers +#' @tether keras.optimizers +#' @noRd +NULL + + +#' @title keras.preprocessing.image +#' @name keras.preprocessing.image +#' @tether keras.preprocessing.image +#' @noRd +NULL + + +#' @title keras.preprocessing.sequence +#' @name keras.preprocessing.sequence +#' @tether keras.preprocessing.sequence +#' @noRd +NULL + + +#' @title keras.preprocessing +#' @name keras.preprocessing +#' @tether keras.preprocessing +#' @noRd +NULL + + +#' @title keras.random +#' @name keras.random +#' @tether keras.random +#' @noRd +NULL + + +#' @title keras.regularizers +#' @name keras.regularizers +#' @tether keras.regularizers +#' @noRd +NULL + + +#' @title keras.saving +#' @name keras.saving +#' @tether keras.saving +#' @noRd +NULL + + +#' @title keras.utils.legacy +#' @name keras.utils.legacy +#' @tether keras.utils.legacy +#' @noRd +NULL + + +#' @title keras.utils +#' @name keras.utils +#' @tether keras.utils +#' @noRd +NULL + + +#' @title keras +#' @name keras +#' @tether keras +#' @noRd +NULL + + +# tether functions that aren't currently exported, but +# we might want to currently export in the future. + +# Seems like it's only useful for dynamic switching +# "int" to int32 or int64 depending on backend. +#' @title standardize_dtype +#' @name standardize_dtype +#' @tether keras.utils.standardize_dtype +#' @noRd +NULL + +# These submodules seem not that useful for R users, but keep an eye on them w/ tethers. +#' @title keras.tree +#' @name keras.tree +#' @tether keras.tree +#' @noRd +NULL + +#' @title keras.quantizers +#' @name keras.quantizers +#' @tether keras.quantizers +#' @noRd +NULL + +#' @title keras.dtype_policies +#' @name keras.dtype_policies +#' @tether keras.dtype_policies +#' @noRd +NULL diff --git a/R/new-py-types.R b/R/new-py-types.R deleted file mode 100644 index 9b7c81325b..0000000000 --- a/R/new-py-types.R +++ /dev/null @@ -1,151 +0,0 @@ - - -new_py_class <- - function(classname, - members = list(), - inherit = NULL, - parent_env = parent.frame(), - convert = TRUE, - inherit_expr = substitute(inherit)) { - - force(inherit_expr) - active <- NULL - for(nm in names(members)) { - if(is_marked_active(members[[nm]])) { - active[[nm]] <- members[[nm]] - members[[nm]] <- NULL - } - } - # R6Class calls substitute() on inherit - r6_class <- eval(as.call(list( - quote(R6::R6Class), - classname = classname, - public = members, - active = active, - inherit = inherit_expr, - cloneable = FALSE, - parent_env = parent_env - ))) - maybe_delayed_r_to_py_R6ClassGenerator(r6_class, convert, parent_env) - } - -#' @rdname new-classes -#' @export -mark_active <- function(x) { - if(!is.function(x)) - stop("Only R functions can be marked active") - attr(x, "marked_active") <- TRUE - x -} - -is_marked_active <- function(x) - identical(attr(x, "marked_active", TRUE), TRUE) - - -#' @rdname new-classes -#' @export -new_metric_class <- -function(classname, ..., initialize, update_state, result) { - members <- capture_args(match.call(), ignore = "classname") - new_py_class(classname, members, - inherit = keras::keras$metrics$Metric, - parent_env = parent.frame()) -} - -#' @rdname new-classes -#' @export -new_loss_class <- -function(classname, ..., call = NULL) { - members <- capture_args(match.call(), ignore = "classname") - members$call <- call - new_py_class(classname, members, - inherit = keras::keras$losses$Loss, - parent_env = parent.frame()) -} - -#' @rdname new-classes -#' @export -new_callback_class <- -function(classname, - ..., - on_epoch_begin = NULL, - on_epoch_end = NULL, - on_train_begin = NULL, - on_train_end = NULL, - on_batch_begin = NULL, - on_batch_end = NULL, - on_predict_batch_begin = NULL, - on_predict_batch_end = NULL, - on_predict_begin = NULL, - on_predict_end = NULL, - on_test_batch_begin = NULL, - on_test_batch_end = NULL, - on_test_begin = NULL, - on_test_end = NULL, - on_train_batch_begin = NULL, - on_train_batch_end = NULL) { - - members <- capture_args(match.call(), ignore = "classname") - members <- drop_nulls(members, - names(which(vapply(formals(sys.function()), is.null, TRUE)))) - - new_py_class(classname, members, - inherit = keras::keras$callbacks$Callback, - parent_env = parent.frame()) -} - - -#' @rdname new-classes -#' @export -new_model_class <- -function(classname, ..., - initialize = NULL, call = NULL, - train_step = NULL, predict_step = NULL, test_step = NULL, - compute_loss = NULL, compute_metrics = NULL) { - members <- capture_args(match.call(), ignore = "classname") - members <- drop_nulls(members, - names(which(vapply(formals(sys.function()), is.null, TRUE)))) - - new_py_class(classname, members, - inherit = keras::keras$Model, - parent_env = parent.frame()) -} - - - -#' Define new keras types -#' -#' These functions can be used to make custom objects that fit in the family of -#' existing keras types. For example, `new_layer_class()` will return a class -#' constructor, an object that behaves like other layer functions such as -#' `layer_dense()`. `new_callback_class()` will return an object that behaves -#' similarly to other callback functions, like -#' `callback_reduce_lr_on_plateau()`, and so on. All arguments with a default -#' `NULL` value are optional methods that can be provided. -#' -#' `mark_active()` is a decorator that can be used to indicate functions that -#' should become active properties of the class instances. -#' -#' @rdname new-classes -#' @param classname The classname as a string. Convention is for the classname -#' to be a CamelCase version of the constructor. -#' @param ... Additional fields and methods for the new type. -#' @param initialize,build,call,get_config,on_epoch_begin,on_epoch_end,on_train_begin,on_train_end,on_batch_begin,on_batch_end,on_predict_batch_begin,on_predict_batch_end,on_predict_begin,on_predict_end,on_test_batch_begin,on_test_batch_end,on_test_begin,on_test_end,on_train_batch_begin,on_train_batch_end,update_state,result,train_step,predict_step,test_step,compute_loss,compute_metrics Optional methods that can be overridden. -#' @param x A function that should be converted to an active property of the class type. -#' -#' @return A new class generator object that inherits from the appropriate Keras -#' base class. -#' @export -new_layer_class <- -function(classname, ..., - initialize = NULL, build = NULL, call = NULL, get_config = NULL) { - members <- capture_args(match.call(), ignore = "classname") - members <- drop_nulls(members, - names(which(vapply(formals(sys.function()), is.null, TRUE)))) - - type <- new_py_class(classname, members, - inherit = keras$layers$Layer, - parent_env = parent.frame()) - - create_layer_wrapper(type) -} diff --git a/R/ops-image.R b/R/ops-image.R new file mode 100644 index 0000000000..7891e73610 --- /dev/null +++ b/R/ops-image.R @@ -0,0 +1,506 @@ + + +#' Applies the given transform(s) to the image(s). +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 64, 80, 3)) # batch of 2 RGB images +#' transform <- op_array(rbind(c(1.5, 0, -20, 0, 1.5, -16, 0, 0), # zoom +#' c(1, 0, -20, 0, 1, -16, 0, 0))) # translation)) +#' y <- op_image_affine_transform(x, transform) +#' shape(y) +#' # (2, 64, 80, 3) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(64, 80, 3)) # single RGB image +#' transform <- op_array(c(1.0, 0.5, -20, 0.5, 1.0, -16, 0, 0)) # shear +#' y <- op_image_affine_transform(x, transform) +#' shape(y) +#' # (64, 80, 3) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(2, 3, 64, 80)) # batch of 2 RGB images +#' transform <- op_array(rbind( +#' c(1.5, 0,-20, 0, 1.5,-16, 0, 0), # zoom +#' c(1, 0,-20, 0, 1,-16, 0, 0) # translation +#' )) +#' y <- op_image_affine_transform(x, transform, data_format = "channels_first") +#' shape(y) +#' # (2, 3, 64, 80) +#' ``` +#' +#' @returns +#' Applied affine transform image or batch of images. +#' +#' @param image +#' Input image or batch of images. Must be 3D or 4D. +#' +#' @param transform +#' Projective transform matrix/matrices. A vector of length 8 or +#' tensor of size N x 8. If one row of transform is +#' `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps the output point +#' `(x, y)` to a transformed input point +#' `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, +#' where `k = c0 x + c1 y + 1`. The transform is inverted compared to +#' the transform mapping input points to output points. Note that +#' gradients are not backpropagated into transformation parameters. +#' Note that `c0` and `c1` are only effective when using TensorFlow +#' backend and will be considered as `0` when using other backends. +#' +#' @param interpolation +#' Interpolation method. Available methods are `"nearest"`, +#' and `"bilinear"`. Defaults to `"bilinear"`. +#' +#' @param fill_mode +#' Points outside the boundaries of the input are filled +#' according to the given mode. Available methods are `"constant"`, +#' `"nearest"`, `"wrap"` and `"reflect"`. Defaults to `"constant"`. +#' - `"reflect"`: `(d c b a | a b c d | d c b a)` +#' The input is extended by reflecting about the edge of the last +#' pixel. +#' - `"constant"`: `(k k k k | a b c d | k k k k)` +#' The input is extended by filling all values beyond +#' the edge with the same constant value k specified by +#' `fill_value`. +#' - `"wrap"`: `(a b c d | a b c d | a b c d)` +#' The input is extended by wrapping around to the opposite edge. +#' - `"nearest"`: `(a a a a | a b c d | d d d d)` +#' The input is extended by the nearest pixel. +#' +#' @param fill_value +#' Value used for points outside the boundaries of the input if +#' `fill_mode = "constant"`. Defaults to `0`. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, weight)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @export +#' @family image ops +#' @family image utils +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.image.affine_transform +op_image_affine_transform <- +function (image, transform, interpolation = "bilinear", fill_mode = "constant", + fill_value = 0L, data_format = "channels_last") +{ + args <- capture_args(list(fill_value = as_integer)) + do.call(keras$ops$image$affine_transform, args) +} + + +#' Extracts patches from the image(s). +#' +#' @description +#' +#' # Examples +#' ```{r} +#' image <- random_uniform(c(2, 20, 20, 3), dtype = "float32") # batch of 2 RGB images +#' patches <- op_image_extract_patches(image, c(5, 5)) +#' shape(patches) +#' # (2, 4, 4, 75) +#' image <- random_uniform(c(20, 20, 3), dtype = "float32") # 1 RGB image +#' patches <- op_image_extract_patches(image, c(3, 3), c(1, 1)) +#' shape(patches) +#' # (18, 18, 27) +#' ``` +#' +#' @returns +#' Extracted patches 3D (if not batched) or 4D (if batched) +#' +#' @param image +#' Input image or batch of images. Must be 3D or 4D. +#' +#' @param size +#' Patch size int or list (patch_height, patch_width) +#' +#' @param strides +#' strides along height and width. If not specified, or +#' if `NULL`, it defaults to the same value as `size`. +#' +#' @param dilation_rate +#' This is the input stride, specifying how far two +#' consecutive patch samples are in the input. For value other than 1, +#' strides must be 1. NOTE: `strides > 1` is not supported in +#' conjunction with `dilation_rate > 1` +#' +#' @param padding +#' The type of padding algorithm to use: `"same"` or `"valid"`. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, weight)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @export +#' @family image ops +#' @family image utils +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.image.extract_patches +op_image_extract_patches <- +function (image, size, strides = NULL, dilation_rate = 1L, padding = "valid", + data_format = "channels_last") +{ + args <- capture_args(list(size = as_integer, dilation_rate = as_integer)) + do.call(keras$ops$image$extract_patches, args) +} + + +#' Map the input array to new coordinates by interpolation.. +#' +#' @description +#' Note that interpolation near boundaries differs from the scipy function, +#' because we fixed an outstanding bug +#' [scipy/issues/2640](https://github.com/scipy/scipy/issues/2640). +#' +#' @returns +#' Output image or batch of images. +#' +#' @param input +#' The input array. +#' +#' @param coordinates +#' The coordinates at which input is evaluated. +#' +#' @param order +#' The order of the spline interpolation. The order must be `0` or +#' `1`. `0` indicates the nearest neighbor and `1` indicates the linear +#' interpolation. +#' +#' @param fill_mode +#' Points outside the boundaries of the input are filled +#' according to the given mode. Available methods are `"constant"`, +#' `"nearest"`, `"wrap"` and `"mirror"` and `"reflect"`. Defaults to +#' `"constant"`. +#' - `"constant"`: `(k k k k | a b c d | k k k k)` +#' The input is extended by filling all values beyond +#' the edge with the same constant value k specified by +#' `fill_value`. +#' - `"nearest"`: `(a a a a | a b c d | d d d d)` +#' The input is extended by the nearest pixel. +#' - `"wrap"`: `(a b c d | a b c d | a b c d)` +#' The input is extended by wrapping around to the opposite edge. +#' - `"mirror"`: `(c d c b | a b c d | c b a b)` +#' The input is extended by mirroring about the edge. +#' - `"reflect"`: `(d c b a | a b c d | d c b a)` +#' The input is extended by reflecting about the edge of the last +#' pixel. +#' +#' @param fill_value +#' Value used for points outside the boundaries of the input if +#' `fill_mode = "constant"`. Defaults to `0`. +#' +#' @export +#' @family image ops +#' @family image utils +#' @family ops +# @seealso +# + +#' @tether keras.ops.image.map_coordinates +op_image_map_coordinates <- +function (input, coordinates, order, fill_mode = "constant", + fill_value = 0L) +{ + args <- capture_args(list(fill_value = as_integer)) + do.call(keras$ops$image$map_coordinates, args) +} + + +#' Pad `images` with zeros to the specified `height` and `width`. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' images <- random_uniform(c(15, 25, 3)) +#' padded_images <- op_image_pad( +#' images, 2, 3, target_height = 20, target_width = 30 +#' ) +#' shape(padded_images) +#' ``` +#' +#' ```{r} +#' batch_images <- random_uniform(c(2, 15, 25, 3)) +#' padded_batch <- op_image_pad(batch_images, 2, 3, +#' target_height = 20, +#' target_width = 30) +#' shape(padded_batch) +#' ``` +#' +#' @returns +#' - If `images` were 4D, a 4D float Tensor of shape +#' `(batch, target_height, target_width, channels)` +#' - If `images` were 3D, a 3D float Tensor of shape +#' `(target_height, target_width, channels)` +#' +#' @param images +#' 4D Tensor of shape `(batch, height, width, channels)` or 3D +#' Tensor of shape `(height, width, channels)`. +#' +#' @param top_padding +#' Number of rows of zeros to add on top. +#' +#' @param bottom_padding +#' Number of rows of zeros to add at the bottom. +#' +#' @param left_padding +#' Number of columns of zeros to add on the left. +#' +#' @param right_padding +#' Number of columns of zeros to add on the right. +#' +#' @param target_height +#' Height of output images. +#' +#' @param target_width +#' Width of output images. +#' +#' @export +#' @family image ops +#' @family image utils +#' @family ops +# @seealso +# + +#' +#' @tether keras.ops.image.pad_images +op_image_pad <- +function (images, top_padding = NULL, left_padding = NULL, target_height = NULL, + target_width = NULL, bottom_padding = NULL, right_padding = NULL) +{ + args <- capture_args(list(images = as_integer, top_padding = as_integer, + bottom_padding = as_integer, left_padding = as_integer, + right_padding = as_integer, target_height = as_integer, + target_width = as_integer)) + do.call(keras$ops$image$pad_images, args) +} + + +#' +#' Resize images to size using the specified interpolation method. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 4, 4, 3)) # batch of 2 RGB images +#' y <- op_image_resize(x, c(2, 2)) +#' shape(y) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(4, 4, 3)) # single RGB image +#' y <- op_image_resize(x, c(2, 2)) +#' shape(y) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(2, 3, 4, 4)) # batch of 2 RGB images +#' y <- op_image_resize(x, c(2, 2), data_format = "channels_first") +#' shape(y) +#' ``` +#' +#' @returns +#' Resized image or batch of images. +#' +#' @param image +#' Input image or batch of images. Must be 3D or 4D. +#' +#' @param size +#' Size of output image in `(height, width)` format. +#' +#' @param interpolation +#' Interpolation method. Available methods are `"nearest"`, +#' `"bilinear"`, and `"bicubic"`. Defaults to `"bilinear"`. +#' +#' @param antialias +#' Whether to use an antialiasing filter when downsampling an +#' image. Defaults to `FALSE`. +#' +#' @param crop_to_aspect_ratio +#' If `TRUE`, resize the images without aspect +#' ratio distortion. When the original aspect ratio differs +#' from the target aspect ratio, the output image will be +#' cropped so as to return the +#' largest possible window in the image (of size `(height, width)`) +#' that matches the target aspect ratio. By default +#' (`crop_to_aspect_ratio=FALSE`), aspect ratio may not be preserved. +#' +#' @param pad_to_aspect_ratio +#' If `TRUE`, pad the images without aspect +#' ratio distortion. When the original aspect ratio differs +#' from the target aspect ratio, the output image will be +#' evenly padded on the short side. +#' +#' @param fill_mode +#' When using `pad_to_aspect_ratio=TRUE`, padded areas +#' are filled according to the given mode. Only `"constant"` is +#' supported at this time +#' (fill with constant value, equal to `fill_value`). +#' +#' @param fill_value +#' Float. Padding value to use when `pad_to_aspect_ratio=TRUE`. +#' +#' @param data_format +#' string, either `"channels_last"` or `"channels_first"`. +#' The ordering of the dimensions in the inputs. `"channels_last"` +#' corresponds to inputs with shape `(batch, height, width, channels)` +#' while `"channels_first"` corresponds to inputs with shape +#' `(batch, channels, height, weight)`. It defaults to the +#' `image_data_format` value found in your Keras config file at +#' `~/.keras/keras.json`. If you never set it, then it will be +#' `"channels_last"`. +#' +#' @export +#' @family image ops +#' @family image utils +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.image.resize +op_image_resize <- +function (image, size, interpolation = "bilinear", antialias = FALSE, + crop_to_aspect_ratio = FALSE, pad_to_aspect_ratio = FALSE, + fill_mode = "constant", fill_value = 0, + data_format = "channels_last") +{ + args <- capture_args(list(size = as_integer)) + do.call(keras$ops$image$resize, args) +} + + +#' Crop `images` to a specified `height` and `width`. +#' +#' @description +#' +#' # Examples +#' ```r +#' images <- op_reshape(op_arange(1, 28, dtype="float32"), c(3, 3, 3)) +#' images[, , 1] # print the first channel of the images +#' +#' cropped_images <- op_image_crop(images, 0, 0, 2, 2) +#' cropped_images[, , 1] # print the first channel of the cropped images +#' ``` +#' +#' @returns +#' If `images` were 4D, a 4D float Tensor of shape +#' `(batch, target_height, target_width, channels)` +#' If `images` were 3D, a 3D float Tensor of shape +#' `(target_height, target_width, channels)` +#' +#' @param images +#' 4-D batch of images of shape `(batch, height, width, channels)` +#' or 3-D single image of shape `(height, width, channels)`. +#' +#' @param top_cropping +#' Number of columns to crop from the top. +#' +#' @param bottom_cropping +#' Number of columns to crop from the bottom. +#' +#' @param left_cropping +#' Number of columns to crop from the left. +#' +#' @param right_cropping +#' Number of columns to crop from the right. +#' +#' @param target_height +#' Height of the output images. +#' +#' @param target_width +#' Width of the output images. +#' +#' @export +#' @family image ops +#' @family image utils +#' @family ops +#' @tether keras.ops.image.crop_images +#' @seealso +#' + +op_image_crop <- +function (images, top_cropping = NULL, left_cropping = NULL, + target_height = NULL, target_width = NULL, bottom_cropping = NULL, + right_cropping = NULL) { + args <- capture_args(list( + top_cropping = as_integer, + left_cropping = as_integer, + bottom_cropping = as_integer, + right_cropping = as_integer, + target_height = as_integer, + target_width = as_integer + )) + do.call(keras$ops$image$crop_images, args) +} + +#' Convert RGB images to grayscale. +#' +#' @description +#' This function converts RGB images to grayscale images. It supports both +#' 3D and 4D tensors, where the last dimension represents channels. +#' +#' # Examples +#' ```{r} +#' x <- random_uniform(c(2, 4, 4, 3)) +#' y <- op_image_rgb_to_grayscale(x) +#' shape(y) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(4, 4, 3)) # Single RGB image +#' y = op_image_rgb_to_grayscale(x) +#' shape(y) +#' ``` +#' +#' ```{r} +#' x <- random_uniform(c(2, 3, 4, 4)) +#' y <- op_image_rgb_to_grayscale(x, data_format="channels_first") +#' shape(y) +#' ``` +#' +#' @returns +#' Grayscale image or batch of grayscale images. +#' +#' @param image +#' Input RGB image or batch of RGB images. Must be a 3D tensor +#' with shape `(height, width, channels)` or a 4D tensor with shape +#' `(batch, height, width, channels)`. +#' +#' @param data_format +#' A string specifying the data format of the input tensor. +#' It can be either `"channels_last"` or `"channels_first"`. +#' `"channels_last"` corresponds to inputs with shape +#' `(batch, height, width, channels)`, while `"channels_first"` +#' corresponds to inputs with shape `(batch, channels, height, width)`. +#' Defaults to `"channels_last"`. +#' +#' @export +#' @family image ops +#' @family image utils +#' @family ops +#' @tether keras.ops.image.rgb_to_grayscale +op_image_rgb_to_grayscale <- +function (image, data_format = "channels_last") +keras$ops$image$rgb_to_grayscale(image, data_format) diff --git a/R/ops-linalg.R b/R/ops-linalg.R new file mode 100644 index 0000000000..49ac493991 --- /dev/null +++ b/R/ops-linalg.R @@ -0,0 +1,253 @@ + +#' Computes the Cholesky decomposition of a positive semi-definite matrix. +#' +#' @returns +#' A tensor of shape `(..., M, M)` representing the lower triangular +#' Cholesky factor of `x`. +#' +#' @param x +#' Input tensor of shape `(..., M, M)`. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.cholesky +# @seealso +# + +op_cholesky <- +function (x) +keras$ops$cholesky(x) + + +#' Computes the determinant of a square tensor. +#' +#' @returns +#' A tensor of shape `(...)` represeting the determinant of `x`. +#' +#' @param x +#' Input tensor of shape `(..., M, M)`. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.det +# @seealso +# + +op_det <- +function (x) +keras$ops$det(x) + + +#' Computes the eigenvalues and eigenvectors of a square matrix. +#' +#' @returns +#' A list of two tensors: a tensor of shape `(..., M)` containing +#' eigenvalues and a tensor of shape `(..., M, M)` containing eigenvectors. +#' +#' @param x +#' Input tensor of shape `(..., M, M)`. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.eig +# @seealso +# + +op_eig <- +function (x) +keras$ops$eig(x) + +#' Computes the eigenvalues and eigenvectors of a complex Hermitian. +#' +#' @returns +#' A list of two tensors: a tensor of shape `(..., M)` containing +#' eigenvalues and a tensor of shape `(..., M, M)` containing eigenvectors. +#' +#' @param x +#' Input tensor of shape `(..., M, M)`. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.eigh +op_eigh <- +function (x) +keras$ops$eigh(x) + +#' Computes the inverse of a square tensor. +#' +#' @returns +#' A tensor of shape `(..., M, M)` representing the inverse of `x`. +#' +#' @param x +#' Input tensor of shape `(..., M, M)`. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.inv +# @seealso +# + +op_inv <- +function (x) +keras$ops$inv(x) + + +#' Computes the lower-upper decomposition of a square matrix. +#' +#' @returns +#' A tuple of two tensors: a tensor of shape `(..., M, M)` containing the +#' lower and upper triangular matrices and a tensor of shape `(..., M)` +#' containing the pivots. +#' +#' @param x +#' A tensor of shape `(..., M, M)`. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.lu_factor +# @seealso +# + +op_lu_factor <- +function (x) +keras$ops$lu_factor(x) + + +#' Matrix or vector norm. +#' +#' @description +#' This function is able to return one of eight different matrix norms, or one +#' of an infinite number of vector norms (described below), depending on the +#' value of the `ord` parameter. +#' +#' # Note +#' For values of `ord < 1`, the result is, strictly speaking, not a +#' mathematical 'norm', but it may still be useful for various numerical +#' purposes. The following norms can be calculated: +#' - For matrices: +#' - `ord=NULL`: Frobenius norm +#' - `ord="fro"`: Frobenius norm +#' - `ord="nuc"`: nuclear norm +#' - `ord=Inf`: `max(sum(abs(x), axis=2))` +#' - `ord=-Inf`: `min(sum(abs(x), axis=2))` +#' - `ord=0`: not supported +#' - `ord=1`: `max(sum(abs(x), axis=1))` +#' - `ord=-1`: `min(sum(abs(x), axis=1))` +#' - `ord=2`: 2-norm (largest sing. value) +#' - `ord=-2`: smallest singular value +#' - other: not supported +#' - For vectors: +#' - `ord=NULL`: 2-norm +#' - `ord="fro"`: not supported +#' - `ord="nuc"`: not supported +#' - `ord=Inf`: `max(abs(x))` +#' - `ord=-Inf`: `min(abs(x))` +#' - `ord=0`: `sum(x != 0)` +#' - `ord=1`: as below +#' - `ord=-1`: as below +#' - `ord=2`: as below +#' - `ord=-2`: as below +#' - other: `sum(abs(x)^ord)^(1/ord)` +#' +#' # Examples +#' ```{r} +#' x <- op_reshape(op_arange(9, dtype="float32") - 4, c(3, 3)) +#' op_norm(x) +#' # 7.7459664 +#' ``` +#' +#' @returns +#' Norm of the matrix or vector(s). +#' +#' @param x +#' Input tensor. +#' +#' @param ord +#' Order of the norm (see table under Notes). The default is `NULL`. +#' +#' @param axis +#' If `axis` is an integer, it specifies the axis of `x` along which +#' to compute the vector norms. If `axis` is a length 2 vector, it specifies +#' the axes that hold 2-D matrices, and the matrix norms of these +#' matrices are computed. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.norm +# @seealso +# + +op_norm <- +function (x, ord = NULL, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list( + axis = as_axis, + ord = function(x) { + if (is.double(x) && all(!is.infinite(x))) + as.integer(x) + else + x + } + )) + do.call(keras$ops$norm, args) +} + + +#' Solves a linear system of equations given by `a %*% x = b`. +#' +#' @returns +#' A tensor of shape `(..., M)` or `(..., M, N)` representing the solution +#' of the linear system. Returned shape is identical to `b`. +#' +#' @param a +#' A tensor of shape `(..., M, M)` representing the coefficients matrix. +#' +#' @param b +#' A tensor of shape `(..., M)` or `(..., M, N)` represeting the +#' right-hand side or "dependent variable" matrix. +#' +#' @param lower logical. +#' Use only data contained in the lower triangle of `a`. Default is to use upper triangle. +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.solve_triangular +# @seealso +# + +op_solve_triangular <- +function (a, b, lower = FALSE) +keras$ops$solve_triangular(a, b, lower) + + +#' Computes the singular value decomposition of a matrix. +#' +#' @returns +#' A list of three tensors: +#' - a tensor of shape `(..., M, M)` containing the +#' left singular vectors, +#' - a tensor of shape `(..., M, N)` containing the +#' singular values and +#' - a tensor of shape `(..., N, N)` containing the +#' right singular vectors. +#' +#' @param x +#' Input tensor of shape `(..., M, N)`. +#' +#' @param full_matrices Logical +#' @param compute_uv Logical +#' +#' @export +#' @family linear algebra ops +#' @family ops +#' @tether keras.ops.svd +#' @seealso +#' + +op_svd <- +function (x, full_matrices = TRUE, compute_uv = TRUE) +keras$ops$svd(x, full_matrices, compute_uv) diff --git a/R/ops-math.R b/R/ops-math.R new file mode 100644 index 0000000000..883efb0278 --- /dev/null +++ b/R/ops-math.R @@ -0,0 +1,25 @@ +#' Computes the inverse error function of `x`, element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-0.5, -0.2, -0.1, 0.0, 0.3)) +#' op_erfinv(x) +#' ``` +#' +#' @returns +#' A tensor with the same dtype as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family math ops +#' @family ops +#' @tether keras.ops.erfinv +# @seealso +# + +op_erfinv <- +function (x) +keras$ops$erfinv(x) diff --git a/R/ops-nn.R b/R/ops-nn.R new file mode 100644 index 0000000000..f583b165a0 --- /dev/null +++ b/R/ops-nn.R @@ -0,0 +1,107 @@ +#' Normalizes `x` by `mean` and `variance`. +#' +#' @description +#' This op is typically used by the batch normalization step in a neural +#' network. It normalizes the input tensor along the given axis. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(rbind(c(0.1, 0.2, 0.3), +#' c(0.4, 0.5, 0.6), +#' c(0.7, 0.8, 0.9))) +#' op_batch_normalization( +#' x, +#' mean = c(0.4, 0.5, 0.6), +#' variance = c(0.67, 0.67, 0.67), +#' axis = -1 +#' ) +#' ``` +#' +#' @returns +#' The normalized tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param mean +#' A mean vector of the same length as the `axis` dimension of the +#' input thensor. +#' +#' @param variance +#' A variance vector of the same length as the `axis` dimension +#' of the input tensor. +#' +#' @param axis +#' Integer, the axis that should be normalized. +#' +#' @param offset +#' An offset vector of the same length as the `axis` dimension of +#' the input tensor. If not `NULL`, `offset` is added to the normalized +#' tensor. Defaults to `NULL`. +#' +#' @param scale +#' A scale vector of the same length as the `axis` dimension of the +#' input tensor. If not `NULL`, the normalized tensor is multiplied by +#' `scale`. Defaults to `NULL`. +#' +#' @param epsilon +#' Small float added to variance to avoid dividing by zero. +#' Defaults to 1e-3. +#' +#' @export +#' @family nn ops +#' @family ops +#' @tether keras.ops.batch_normalization +#' @seealso +#' + +op_batch_normalization <- +function (x, mean, variance, axis, offset = NULL, scale = NULL, + epsilon = 0.001) +{ + args <- capture_args(list( + axis = as_axis, + mean = as_array, + variance = as_array, + offset = as_array + )) + do.call(keras$ops$batch_normalization, args) +} + +#' Normalizes `x` over the specified axis. +#' +#' @description +#' It is defined as: `normalize(x) = x / max(norm(x), epsilon)`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(rbind(c(1, 2, 3), c(4, 5, 6))) +#' x_norm <- op_normalize(x) +#' x_norm +#' ``` +#' +#' @returns +#' The normalized array. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' The axis or axes along which to perform normalization. +#' Default to -1. +#' +#' @param order +#' The exponent value in the norm formulation. +#' Defaults to 2. +#' +#' @export +#' @family nn ops +#' @family ops +#' @tether keras.ops.normalize +#' @seealso +#' + +op_normalize <- +function (x, axis = -1L, order = 2L) +{ + args <- capture_args(list(axis = as_axis, order = as_integer)) + do.call(keras$ops$normalize, args) +} diff --git a/R/ops-numpy.R b/R/ops-numpy.R new file mode 100644 index 0000000000..d7636cea62 --- /dev/null +++ b/R/ops-numpy.R @@ -0,0 +1,20 @@ +#' Safe element-wise division which returns 0 where the denominator is 0. +#' +#' @returns +#' The quotient `x1/x2`, element-wise, with zero where x2 is zero. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.divide_no_nan +# @seealso +# + +op_divide_no_nan <- +function (x1, x2) +keras$ops$divide_no_nan(x1, x2) diff --git a/R/ops.R b/R/ops.R new file mode 100644 index 0000000000..cffdac8a10 --- /dev/null +++ b/R/ops.R @@ -0,0 +1,7408 @@ + + +#' Cast a tensor to the desired dtype. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' (x <- op_arange(4)) +#' op_cast(x, dtype = "float16") +#' ``` +#' +#' @returns +#' A tensor of the specified `dtype`. +#' +#' @param x +#' A tensor or variable. +#' +#' @param dtype +#' The target type. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.cast +op_cast <- +function (x, dtype) +keras$ops$cast(x, dtype) + + +#' Conditionally applies `true_fn` or `false_fn`. +#' +#' @returns +#' The output of either `true_fn` or `false_fn` depending on pred. +#' +#' @param pred +#' Boolean scalar type +#' +#' @param true_fn +#' Callable returning the output for the `pred == TRUE` case. +#' +#' @param false_fn +#' Callable returning the output for the `pred == FALSE` case. +#' +#' @details +#' +#' # Examples +#' ```{r} +#' fn <- tensorflow::tf_function(function(x) { +#' op_cond(x > 0, +#' true_fn = \() x + 1, +#' false_fn = \() x - 1) +#' }) +#' +#' fn(tensorflow::as_tensor(1)) +#' fn(tensorflow::as_tensor(-1)) +#' # +#' # Conditional side-effect (print only, no return value). +#' file <- tempfile(fileext = ".txt") +#' fn <- tensorflow::tf_function(function(epochs) { +#' op_fori_loop( +#' 0, epochs, +#' body_fun = \(epoch, state) { +#' op_cond(epoch %% 20 == 0, +#' \() { +#' tensorflow::tf$print( +#' "epoch:", epoch, +#' output_stream = paste0("file://", file)) +#' NULL +#' }, +#' \() {NULL}) +#' state +#' }, +#' init_val = tensorflow::as_tensor(0)) +#' }) +#' +#' fn(tensorflow::as_tensor(100)) +#' +#' readLines(file) +#' +#' # cleanup +#' unlink(file) +#' ``` +#' @export +#' @family core ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.cond +op_cond <- +function (pred, true_fn, false_fn) +keras$ops$cond(pred, true_fn, false_fn) + + +#' Convert a tensor to a NumPy array. +#' +#' @returns +#' A NumPy array. +#' +#' @param x +#' A tensor. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.convert_to_numpy +op_convert_to_numpy <- +function (x) +r_to_py(keras$ops)$convert_to_numpy(x) + + +#' Convert an array to a tensor. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- array(c(1, 2, 3)) +#' y <- op_convert_to_tensor(x) +#' y +#' op_convert_to_tensor(c(1, 3, 2, 0), "int32") +#' ``` +#' +#' @returns +#' A tensor of the specified `dtype`. +#' +#' @param x +#' An array. +#' +#' @param dtype +#' The target type. +#' +#' @param sparse +#' Whether to keep sparse tensors. `FALSE` will cause sparse +#' tensors to be densified. The default value of `NULL` means that +#' sparse tensors are kept only if the backend supports them. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + [op_array()] +#' + +# + +#' @tether keras.ops.convert_to_tensor +op_convert_to_tensor <- +function (x, dtype = NULL, sparse = NULL) { + if (!is.null(dtype) && is_string(dtype) && + typeof(x) == "double" && + grepl("int", dtype, fixed = TRUE)) + storage.mode(x) <- "integer" + keras$ops$convert_to_tensor(x, dtype, sparse) +} + + +#' For loop implementation. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' lower <- 0L +#' upper <- 10L +#' body_fun <- function(i, state) state + i +#' init_state <- 0L +#' final_state <- op_fori_loop(lower, upper, body_fun, init_state) +#' final_state +#' ``` +#' +#' @returns +#' The final state after the loop. +#' +#' @param lower +#' The initial value of the loop variable. +#' +#' @param upper +#' The upper bound of the loop variable. +#' +#' @param body_fun +#' A callable that represents the loop body. Must take two +#' arguments: the loop variable and the loop state. The loop state +#' should be updated and returned by this function. +#' +#' @param init_val +#' The initial value of the loop state. +#' +#' @export +#' @family core ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.fori_loop +op_fori_loop <- +function (lower, upper, body_fun, init_val) +keras$ops$fori_loop(lower, upper, body_fun, init_val) + + +#' Check whether the given object is a tensor. +#' +#' @description +#' +#' # Note +#' This checks for backend specific tensors so passing a TensorFlow +#' tensor would return `FALSE` if your backend is PyTorch or JAX. +#' +#' @returns +#' `TRUE` if `x` is a tensor, otherwise `FALSE`. +#' +#' @param x +#' A variable. +#' +#' @export +#' @family core ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.is_tensor +op_is_tensor <- +function (x) +keras$ops$is_tensor(x) + + +#' Returns a tensor of shape `shape` where `indices` are set to `values`. +#' +#' @description +#' At a high level, this operation does `zeros[indices] = updates` and +#' returns the output. It is equivalent to: +#' +#' ```{r, eval = FALSE} +#' output <- op_scatter_update(op_zeros(shape), indices, values) +#' ``` +#' +#' # Examples +#' ```{r} +#' indices <- rbind(c(1, 2), c(2, 2)) +#' values <- op_array(c(1, 1)) +#' op_scatter(indices, values, shape= c(2, 2)) +#' ``` +#' +#' @param indices +#' A tensor or list specifying +#' indices for the values in `values`. +#' +#' @param values +#' A tensor, the values to be set at `indices`. +#' +#' @param shape +#' Shape of the output tensor. +#' +#' @returns A tensor of shape `shape` where `indices` are set to `values`. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.scatter +op_scatter <- +function (indices, values, shape) +{ + args <- capture_args(list(indices = as_index, shape = normalize_shape)) + do.call(keras$ops$scatter, args) +} + + +#' Update inputs via updates at scattered (sparse) indices. +#' +#' @description +#' At a high level, this operation does `inputs[indices] <- updates`. +#' Assume `inputs` is a tensor of shape `(D1, D2, ..., Dn)`, there are 2 main +#' usages of `scatter_update`. +#' +#' 1. `indices` is a 2D tensor of shape `(num_updates, n)`, where `num_updates` +#' is the number of updates to perform, and `updates` is a 1D tensor of +#' shape `(num_updates)`. For example, if `inputs` is `op_zeros(c(4, 4, 4))`, +#' and we want to update `inputs[2, 3, 4]` and `inputs[1, 2, 4]` as `1`, then +#' we can use: +#' +#' ```{r} +#' inputs <- op_zeros(c(4, 4, 4)) +#' indices <- rbind(c(2, 3, 4), +#' c(1, 2, 4)) +#' updates <- op_array(c(1, 1), "float32") +#' op_scatter_update(inputs, indices, updates) +#' ``` +#' +#' 2 `indices` is a 2D tensor of shape `(num_updates, k)`, where `num_updates` +#' is the number of updates to perform, and `k` (`k <= n`) is the size of +#' each index in `indices`. `updates` is a `n - k`-D tensor of shape +#' `(num_updates, shape(inputs)[-(1:k)])`. For example, if +#' `inputs <- op_zeros(c(4, 4, 4))`, and we want to update `inputs[1, 2, ]` +#' and `inputs[2, 3, ]` as `[1, 1, 1, 1]`, then `indices` would have shape +#' `(num_updates, 2)` (`k = 2`), and `updates` would have shape +#' `(num_updates, 4)` (`shape(inputs)[3:4] == 4`). See the code below: +#' +#' ```{r} +#' inputs <- op_zeros(c(4, 4, 4)) +#' indices <- rbind(c(2, 3), +#' c(3, 4)) +#' updates <- op_array(rbind(c(1, 1, 1, 1), +#' c(1, 1, 1, 1)), +#' "float32") +#' op_scatter_update(inputs, indices, updates) +#' ``` +#' +#' @returns +#' A tensor, has the same shape and dtype as `inputs`. +#' +#' @param inputs +#' A tensor, the tensor to be updated. +#' +#' @param indices +#' A tensor or list of shape `(N, inputs$ndim)`, specifying +#' indices to update. `N` is the number of indices to update, must be +#' equal to the first dimension of `updates`. +#' +#' @param updates +#' A tensor, the new values to be put to `inputs` at `indices`. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.scatter_update +op_scatter_update <- +function (inputs, indices, updates) +{ + args <- capture_args(list(indices = as_index)) + do.call(keras$ops$scatter_update, args) +} + + +#' Gets the shape of the tensor input. +#' +#' @description +#' +#' # Note +#' On the TensorFlow backend, when `x` is a `tf.Tensor` with dynamic +#' shape, dimensions which are dynamic in the context of a compiled function +#' will have a `tf.Tensor` value instead of a static integer value. +#' +#' # Examples +#' ```{r} +#' x <- op_zeros(c(8, 12)) +#' op_shape(x) +#' ``` +#' +#' @returns +#' A list of integers or NULL values, indicating the shape of the input +#' tensor. +#' +#' @param x +#' A tensor. This function will try to access the `shape` attribute of +#' the input tensor. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.shape +op_shape <- +function (x) +{ + out <- keras$ops$shape(x) + class(out) <- "keras_shape" + out +} + + +#' Return a slice of an input tensor. +#' +#' @description +#' At a high level, this operation is an explicit replacement for array slicing +#' e.g. `inputs[start_indices:(start_indices + shape)]`. +#' Unlike slicing via brackets, this operation will accept tensor start +#' indices on all backends, which is useful when indices dynamically computed +#' via other tensor operations. +#' +#' ```{r} +#' (inputs <- op_arange(5*5) |> op_reshape(c(5, 5))) +#' start_indices <- c(3, 3) +#' shape <- c(2, 2) +#' op_slice(inputs, start_indices, shape) +#' ``` +#' +#' @returns +#' A tensor, has the same shape and dtype as `inputs`. +#' +#' @param inputs +#' A tensor, the tensor to be sliced. +#' +#' @param start_indices +#' A list of length `inputs$ndim`, specifying +#' the starting indices for updating. +#' +#' @param shape +#' The full shape of the returned slice. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.slice +op_slice <- +function (inputs, start_indices, shape) +{ + args <- capture_args(list(shape = normalize_shape, start_indices = as_index)) + do.call(keras$ops$slice, args) +} + + +#' Update an input by slicing in a tensor of updated values. +#' +#' @description +#' At a high level, this operation does +#' `inputs[start_indices: start_indices + updates.shape] = updates`. +#' Assume inputs is a tensor of shape `(D1, D2, ..., Dn)`, +#' `start_indices` must be a list of n integers, specifying the starting +#' indices. `updates` must have the same rank as `inputs`, and the size of each +#' dim must not exceed `Di - start_indices[i]`. For example, if we have 2D +#' inputs `inputs = op_zeros(c(5, 5))`, and we want to update the intersection +#' of last 2 rows and last 2 columns as 1, i.e., +#' `inputs[4:5, 4:5] = op_ones(c(2, 2))`, then we can use the code below: +#' +#' ```{r} +#' inputs <- op_zeros(c(5, 5)) +#' start_indices <- c(3, 3) +#' updates <- op_ones(c(2, 2)) +#' op_slice_update(inputs, start_indices, updates) +#' ``` +#' +#' @returns +#' A tensor, has the same shape and dtype as `inputs`. +#' +#' @param inputs +#' A tensor, the tensor to be updated. +#' +#' @param start_indices +#' A list of length `inputs$ndim`, specifying +#' the starting indices for updating. +#' +#' @param updates +#' A tensor, the new values to be put to `inputs` at `indices`. +#' `updates` must have the same rank as `inputs`. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.slice_update +op_slice_update <- +function (inputs, start_indices, updates) +{ + args <- capture_args(list(start_indices = as_index)) + do.call(keras$ops$slice_update, args) +} + + +#' Stops gradient computation. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' var <- op_convert_to_tensor(c(1, 2, 3), dtype="float32") +#' var <- op_stop_gradient(var) +#' ``` +#' +#' @returns +#' The variable with gradient computation disabled. +#' +#' @param variable +#' A tensor variable for which the gradient +#' computation is to be disabled. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.stop_gradient +op_stop_gradient <- +function (variable) +keras$ops$stop_gradient(variable) + + +#' Unpacks the given dimension of a rank-R tensor into rank-(R-1) tensors. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_array(rbind(c(1, 2), +#' c(3, 4))) +#' op_unstack(x, axis=1) +#' op_unstack(x, axis=2) +#' ``` +#' +#' +#' ```{r} +#' all.equal(op_unstack(x), +#' op_unstack(x, axis = 1)) +#' all.equal(op_unstack(x, axis = -1), +#' op_unstack(x, axis = 2)) +#' # [array([1, 2)), array([3, 4))] +#' ``` +#' +#' @returns +#' A list of tensors unpacked along the given axis. +#' +#' @param x +#' The input tensor. +#' +#' @param num +#' The length of the dimension axis. Automatically inferred +#' if `NULL`. +#' +#' @param axis +#' The axis along which to unpack. +#' +#' @export +#' @family core ops +#' @family ops +# @seealso +# + +#' +#' @tether keras.ops.unstack +op_unstack <- +function (x, num = NULL, axis = 1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$unstack, args) +} + + +#' Parallel map of function `f` on the first axis of tensor(s) `elements`. +#' +#' @description +#' Schematically, `op_vectorized_map()` maps over the first dimension of the provided tensors. +#' If `elements` is a list of tensors, then each of the tensors are required to +#' have the same size first dimension, and they are iterated over together. +#' +#' # Examples +#' +#' ```{r} +#' (x <- op_arange(12L) |> op_reshape(c(3, 4))) +#' x |> op_vectorized_map(\(row) {row + 10}) +#' list(x, x, x) |> op_vectorized_map(\(rows) Reduce(`+`, rows)) +#' ``` +#' +#' Note that `f` may be traced and compiled. Meaning, the R function may only +#' evaluated once with symbolic tensors if using Jax or TensorFlow backends, and +#' not with eager tensors. See the output from `str()` in these examples: +#' ```{r} +#' # simplest case, map f over rows of x, +#' # where .x is 1 row of x +#' input <- x +#' output <- op_vectorized_map(input, function(.x) { +#' str(.x) +#' .x + 10 +#' }) +#' output +#' +#' # map f over two tensors simultaneously. Here, # `.x` is a list of two +#' # tensors. The return values from each call of `f(row)` are stacked to form the +#' # final output +#' input <- list(x, x) +#' output <- op_vectorized_map(input, function(.x) { +#' str(.x) +#' .x[[1]] + 10 +#' }) +#' output +#' +#' # same as above, but now returning two tensors in the final output +#' output <- op_vectorized_map(input, function(.x) { +#' str(.x) +#' c(.x1, .x2) %<-% .x +#' list(.x1+10, .x2+20) +#' }) +#' output +#' +#' # passing named lists. +#' # WARNING: if passing a named list, the order of elements of `.x` supplied +#' # to `f` is not stable. Only retrieve elements by name. +#' input <- list(name1 = x, name2 = x) +#' output <- op_vectorized_map(input, function(.x) { +#' str(.x) +#' list(outname1 = .x$name1 + 10, +#' outname2 = .x$name2 + 20) +#' }) +#' output +#' +#' # passing a tuple() is equivalent to passing an unnamed list() +#' input <- tuple(x, x) +#' output <- op_vectorized_map(input, function(.x) { +#' str(.x) +#' list(.x[[1]] + 10) +#' }) +#' output +#' ``` +#' +#' # Debugging `f` +#' +#' Even in eager contexts, `op_vectorized_map()` may trace `f`. In that case, if +#' you want to eagerly debug `f` (e.g., with `browser()`), you can swap in a +#' manual (slow) implementation of `op_vectorized_map()`. Note this example +#' debug implementation does not handle all the same edge cases as +#' `op_vectorized_map()`, in particular, if `f` returns a structure of multiple +#' tensors. +#' +#' ```r +#' op_vectorized_map_debug <- function(elements, fn) { +#' +#' if (!is.list(elements)) { +#' # `elements` is a single tensor +#' batch_size <- op_shape(elements)[[1]] +#' out <- elements |> +#' op_split(batch_size) |> +#' lapply(fn) |> +#' op_stack() +#' return(out) +#' } +#' +#' # `elements` is a list of tensors +#' batch_size <- elements[[1]] |> op_shape() |> _[[1]] +#' elements |> +#' lapply(\(e) op_split(e, batch_size)) |> +#' zip_lists() |> +#' lapply(fn) |> +#' op_stack() +#' +#' } +#' ``` +#' +#' +#' @param elements +#' see description +#' +#' @param f +#' A function taking either a tensor, or list of tensors. +#' +#' @returns A tensor or list of tensors, the result of mapping `f` across `elements.` +#' @export +#' @family core ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.vectorized_map +op_vectorized_map <- +function (elements, f) +keras$ops$vectorized_map(f, elements) + + +#' While loop implementation. +#' +#' @description +#' +#' # Examples +#' +#' ```{r} +#' i <- 0 +#' loop_vars <- list(i) +#' +#' # cond() must return a scalar bool +#' cond <- function(i) i < 10L +#' +#' # body must return same shape as loop_vars +#' body <- function(i) list(i + 1L) +#' +#' op_while_loop(cond, body, loop_vars) +#' ``` +#' +#' ```{r} +#' x <- 0; y <- 1 +#' cond <- \(x, y) x < 10 +#' body <- \(x, y) list(x+1, y+1) +#' op_while_loop(cond, body, list(x, y)) +#' ``` +#' +#' @returns +#' A list of tensors, has the same shape and dtype as `loop_vars`. +#' +#' @param cond +#' A callable that represents the termination condition of the loop. +#' Must accept a `loop_vars` like structure as an argument. If +#'`loop_vars` is a tuple or unnamed list, each element of `loop_vars` will be +#' passed positionally to the callable. +#' +#' @param body +#' A callable that represents the loop body. Must accept a +#' `loop_vars` like structure as an argument, and return update value +#' with the same structure. If `loop_vars` is a tuple or unnamed list, each +#' element of `loop_vars` will be passed positionally to the callable. +#' +#' @param loop_vars +#' An arbitrary nested structure of tensor state to persist +#' across loop iterations. +#' +#' @param maximum_iterations +#' Optional maximum number of iterations of the while +#' loop to run. If provided, the `cond` output is AND-ed with an +#' additional condition ensuring the number of iterations executed is +#' no greater than `maximum_iterations`. +#' +#' @export +#' @family core ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.while_loop +op_while_loop <- +function (cond, body, loop_vars, maximum_iterations = NULL) +keras$ops$while_loop(cond, body, loop_vars, maximum_iterations) + + +#' Computes the error function of `x`, element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-3, -2, -1, 0, 1)) +#' op_erf(x) +#' # array([-0.99998 , -0.99532, -0.842701, 0., 0.842701], dtype=float32) +#' ``` +#' +#' @returns +#' A tensor with the same dtype as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family math ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.erf +op_erf <- +function (x) +keras$ops$erf(x) + + +#' Expands the dimension of last axis into sequences of `sequence_length`. +#' +#' @description +#' Slides a window of size `sequence_length` over the last axis of the input +#' with a stride of `sequence_stride`, replacing the last axis with +#' `[num_sequences, sequence_length]` sequences. +#' +#' If the dimension along the last axis is N, the number of sequences can be +#' computed by: +#' +#' `num_sequences = 1 + (N - sequence_length) // sequence_stride` +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(1:6) +#' op_extract_sequences(x, 3, 2) +#' ``` +#' +#' @returns +#' A tensor of sequences with shape `[..., num_sequences, sequence_length].` +#' +#' @param x +#' Input tensor. +#' +#' @param sequence_length +#' An integer representing the sequences length. +#' +#' @param sequence_stride +#' An integer representing the sequences hop size. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.extract_sequences +op_extract_sequences <- +function (x, sequence_length, sequence_stride) +{ + args <- capture_args(list(sequence_length = as_integer, + sequence_stride = as_integer)) + do.call(keras$ops$extract_sequences, args) +} + + +#' Computes the Fast Fourier Transform along last axis of input. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x = c(op_array(c(1., 2.)), +#' op_array(c(0., 1.))) +#' op_fft(x) +#' ``` +#' +#' @returns +#' A list containing two tensors - the real and imaginary parts of the +#' output tensor. +#' +#' @param x +#' list of the real and imaginary parts of the input tensor. Both +#' tensors provided should be of floating type. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.fft +op_fft <- +function (x) +keras$ops$fft(x) + + +#' Computes the 2D Fast Fourier Transform along the last two axes of input. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- c(op_array(rbind(c(1, 2), +#' c(2, 1))), +#' op_array(rbind(c(0, 1), +#' c(1, 0)))) +#' op_fft2(x) +#' ``` +#' +#' @returns +#' A list containing two tensors - the real and imaginary parts of the +#' output. +#' +#' @param x +#' list of the real and imaginary parts of the input tensor. Both +#' tensors provided should be of floating type. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.fft2 +op_fft2 <- +function (x) +keras$ops$fft2(x) + + +#' Checks if the targets are in the top-k predictions. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' targets <- op_array(c(2, 5, 3), "int32") +#' predictions <- op_array(dtype = "float32", rbind( +#' c(0.1, 0.4, 0.6, 0.9, 0.5), +#' c(0.1, 0.7, 0.9, 0.8, 0.3), +#' c(0.1, 0.6, 0.9, 0.9, 0.5) +#' )) +#' op_in_top_k(targets, predictions, k = 3L) +#' ``` +#' +#' @returns +#' A boolean tensor of the same shape as `targets`, where each element +#' indicates whether the corresponding target is in the top-k predictions. +#' +#' @param targets +#' A tensor of true labels. +#' +#' @param predictions +#' A tensor of predicted labels. +#' +#' @param k +#' An integer representing the number of predictions to consider. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.in_top_k +op_in_top_k <- +function (targets, predictions, k) +{ + args <- capture_args(list(k = as_integer)) + do.call(keras$ops$in_top_k, args) +} + + +#' Inverse real-valued Fast Fourier transform along the last axis. +#' +#' @description +#' Computes the inverse 1D Discrete Fourier Transform of a real-valued signal +#' over the inner-most dimension of input. +#' +#' The inner-most dimension of the input is assumed to be the result of RFFT: +#' the `fft_length / 2 + 1` unique components of the DFT of a real-valued +#' signal. If `fft_length` is not provided, it is computed from the size of the +#' inner-most dimension of the input `(fft_length = 2 * (inner - 1))`. If the +#' FFT length used to compute is odd, it should be provided since it cannot +#' be inferred properly. +#' +#' Along the axis IRFFT is computed on, if `fft_length / 2 + 1` is smaller than +#' the corresponding dimension of the input, the dimension is cropped. If it is +#' larger, the dimension is padded with zeros. +#' +#' # Examples +#' +#' ```{r, comment = "#>"} +#' real <- op_array(c(0, 1, 2, 3, 4)) +#' imag <- op_array(c(0, 1, 2, 3, 4)) +#' op_irfft(c(real, imag)) +#' +#' all.equal(op_irfft(op_rfft(real, 5), 5), real) +#' ``` +#' +#' @returns +#' A tensor containing the inverse real-valued Fast Fourier Transform +#' along the last axis of `x`. +#' +#' @param x +#' List of the real and imaginary parts of the input tensor. Both +#' tensors in the list should be of floating type. +#' +#' @param fft_length +#' An integer representing the number of the fft length. If not +#' specified, it is inferred from the length of the last axis of `x`. +#' Defaults to `NULL`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.irfft +op_irfft <- +function (x, fft_length = NULL) +{ + args <- capture_args(list(fft_length = as_integer)) + do.call(keras$ops$irfft, args) +} + + +#' Inverse Short-Time Fourier Transform along the last axis of the input. +#' +#' @description +#' To reconstruct an original waveform, the parameters should be the same in +#' `stft`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(0, 1, 2, 3, 4)) +#' op_istft(op_stft(x, 1, 1, 1), 1, 1, 1) +#' # array([0.0, 1.0, 2.0, 3.0, 4.0]) +#' ``` +#' +#' @returns +#' A tensor containing the inverse Short-Time Fourier Transform along the +#' last axis of `x`. +#' +#' @param x +#' Tuple of the real and imaginary parts of the input tensor. Both +#' tensors in the list should be of floating type. +#' +#' @param sequence_length +#' An integer representing the sequence length. +#' +#' @param sequence_stride +#' An integer representing the sequence hop size. +#' +#' @param fft_length +#' An integer representing the size of the FFT that produced +#' `stft`. +#' +#' @param length +#' An integer representing the output is clipped to exactly length. +#' If not specified, no padding or clipping take place. Defaults to +#' `NULL`. +#' +#' @param window +#' A string, a tensor of the window or `NULL`. If `window` is a +#' string, available values are `"hann"` and `"hamming"`. If `window` +#' is a tensor, it will be used directly as the window and its length +#' must be `sequence_length`. If `window` is `NULL`, no windowing is +#' used. Defaults to `"hann"`. +#' +#' @param center +#' Whether `x` was padded on both sides so that the t-th sequence +#' is centered at time `t * sequence_stride`. Defaults to `TRUE`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.istft +op_istft <- +function (x, sequence_length, sequence_stride, fft_length, length = NULL, + window = "hann", center = TRUE) +{ + args <- capture_args(list(sequence_length = as_integer, + sequence_stride = as_integer, fft_length = as_integer, + length = as_integer, x = tuple)) + do.call(keras$ops$istft, args) +} + + +#' Computes the logarithm of sum of exponentials of elements in a tensor. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(1, 2, 3)) +#' op_logsumexp(x) +#' ``` +#' +#' @returns +#' A tensor containing the logarithm of the sum of exponentials of +#' elements in `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' An integer or a list of integers specifying the axis/axes +#' along which to compute the sum. If `NULL`, the sum is computed +#' over all elements. Defaults to`NULL`. +#' +#' @param keepdims +#' A boolean indicating whether to keep the dimensions of +#' the input tensor when computing the sum. Defaults to`FALSE`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.logsumexp +op_logsumexp <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$logsumexp, args) +} + + +#' Computes the QR decomposition of a tensor. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(rbind(c(1, 2), c(3, 4), c(5, 6))) +#' op_qr(x) +#' c(q, r) %<-% op_qr(x) +#' ``` +#' +#' @returns +#' A list containing two tensors. The first tensor of shape `(..., M, K)` +#' is the orthogonal matrix `q` and the second tensor of shape +#' (..., K, N)` is the upper triangular matrix `r`, where `K = min(M, N)`. +#' +#' @param x +#' Input tensor of shape `(..., M, N)`. +#' +#' @param mode +#' A string specifying the mode of the QR decomposition. +#' - 'reduced': Returns the reduced QR decomposition. (default) +#' - 'complete': Returns the complete QR decomposition. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.qr +op_qr <- +function (x, mode = "reduced") +keras$ops$qr(x, mode) + + +#' @export +py_to_r.tensorflow.python.ops.gen_linalg_ops.Qr <- function(x) { + x <- py_eval("tuple")(x) + names(x) <- c("q", "r") + x +} + + +#' Real-valued Fast Fourier Transform along the last axis of the input. +#' +#' @description +#' Computes the 1D Discrete Fourier Transform of a real-valued signal over the +#' inner-most dimension of input. +#' +#' Since the Discrete Fourier Transform of a real-valued signal is +#' Hermitian-symmetric, RFFT only returns the `fft_length / 2 + 1` unique +#' components of the FFT: the zero-frequency term, followed by the +#' `fft_length / 2` positive-frequency terms. +#' +#' Along the axis RFFT is computed on, if `fft_length` is smaller than the +#' corresponding dimension of the input, the dimension is cropped. If it is +#' larger, the dimension is padded with zeros. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(0, 1, 2, 3, 4)) +#' op_rfft(x) +#' ``` +#' +#' ```{r} +#' op_rfft(x, 3) +#' ``` +#' +#' @returns +#' A list containing two tensors - the real and imaginary parts of the +#' output. +#' +#' @param x +#' Input tensor. +#' +#' @param fft_length +#' An integer representing the number of the fft length. If not +#' specified, it is inferred from the length of the last axis of `x`. +#' Defaults to `NULL`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.rfft +op_rfft <- +function (x, fft_length = NULL) +{ + args <- capture_args(list(fft_length = as_integer)) + do.call(keras$ops$rfft, args) +} + + +#' Computes reciprocal of square root of x element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(1, 10, 100)) +#' op_rsqrt(x) +#' # array([1, 0.31622776, 0.1], dtype=float32) +#' ``` +#' +#' @returns +#' A tensor with the same dtype as `x`. +#' +#' @param x +#' input tensor +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.rsqrt +op_rsqrt <- +function (x) +keras$ops$rsqrt(x) + + +#' Computes the max of segments in a tensor. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' data <- op_convert_to_tensor(c(1, 2, 10, 20, 100, 200)) +#' segment_ids <- op_array(c(1, 1, 2, 2, 3, 3), "int32") +#' num_segments <- 3 +#' op_segment_max(data, segment_ids, num_segments) +#' # array([2, 20, 200], dtype=int32) +#' ``` +#' +#' @returns +#' A tensor containing the max of segments, where each element +#' represents the max of the corresponding segment in `data`. +#' +#' @param data +#' Input tensor. +#' +#' @param segment_ids +#' A 1-D tensor containing segment indices for each +#' element in `data`. +#' +#' @param num_segments +#' An integer representing the total number of +#' segments. If not specified, it is inferred from the maximum +#' value in `segment_ids`. +#' +#' @param sorted +#' A boolean indicating whether `segment_ids` is sorted. +#' Defaults to`FALSE`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.segment_max +op_segment_max <- +function (data, segment_ids, num_segments = NULL, sorted = FALSE) +{ + args <- capture_args(list(segment_ids = as_index, num_segments = as_integer)) + do.call(keras$ops$segment_max, args) +} + + +#' Computes the sum of segments in a tensor. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' data <- op_array(c(1, 2, 10, 20, 100, 200)) +#' segment_ids <- op_array(c(1, 1, 2, 2, 3, 3), "int32") +#' num_segments <- 3 +#' op_segment_sum(data, segment_ids, num_segments) +#' ``` +#' +#' @returns +#' A tensor containing the sum of segments, where each element +#' represents the sum of the corresponding segment in `data`. +#' +#' @param data +#' Input tensor. +#' +#' @param segment_ids +#' A 1-D tensor containing segment indices for each +#' element in `data`. +#' +#' @param num_segments +#' An integer representing the total number of +#' segments. If not specified, it is inferred from the maximum +#' value in `segment_ids`. +#' +#' @param sorted +#' A boolean indicating whether `segment_ids` is sorted. +#' Defaults to`FALSE`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.segment_sum +op_segment_sum <- +function (data, segment_ids, num_segments = NULL, sorted = FALSE) +{ + args <- capture_args(list(segment_ids = as_index, num_segments = as_integer)) + do.call(keras$ops$segment_sum, args) +} + +#' Return elements from `choicelist`, based on conditions in `condlist`. +#' +#' @param condlist +#' List of boolean tensors. +#' The list of conditions which determine from which array +#' in choicelist the output elements are taken. +#' When multiple conditions are satisfied, +#' the first one encountered in condlist is used. +#' +#' @param choicelist +#' List of tensors. +#' The list of tensors from which the output elements are taken. +#' This list has to be of the same length as `condlist`. +#' +#' @param default +#' Optional scalar value. +#' The element inserted in the output +#' when all conditions evaluate to `FALSE`. +#' +#' @returns +#' Tensor where the output at position `m` is the `m`-th element +#' of the tensor in `choicelist` where the `m`-th element of the +#' corresponding tensor in `condlist` is `TRUE`. +#' +#' @description +#' +#' # Examples +#' +#' ```{r} +#' x <- op_arange(6L) +#' condlist <- list(x < 3, x > 3) +#' choicelist <- list(x, x^2) +#' op_select(condlist, choicelist, 42) +#' ``` +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.select +op_select <- +function (condlist, choicelist, default = 0L) +{ + args <- capture_args(list(default = as_integer)) + do.call(keras$ops$select, args) +} + + +#' Solves a linear system of equations given by `a x = b`. +#' +#' @description +#' Solves for `x` in the equation `a %*% x == b`. +#' +#' # Examples +#' ```{r} +#' a <- op_array(c(1, 2, 4, 5), dtype="float32") |> op_reshape(c(2, 2)) +#' b <- op_array(c(2, 4, 8, 10), dtype="float32") |> op_reshape(c(2, 2)) +#' op_solve(a, b) +#' ``` +#' +#' @returns +#' A tensor of shape `(..., M)` or `(..., M, N)` representing the solution +#' of the linear system. Returned shape is identical to `b`. +#' +#' @param a +#' A tensor of shape `(..., M, M)` representing the coefficients matrix. +#' +#' @param b +#' A tensor of shape `(..., M)` or `(..., M, N)` represeting the +#' right-hand side or "dependent variable" matrix. +#' +#' @export +#' @family math ops +#' @family ops +# @seealso +# + +#' +#' @tether keras.ops.solve +op_solve <- +function (a, b) +keras$ops$solve(a, b) + + +#' Short-Time Fourier Transform along the last axis of the input. +#' +#' @description +#' The STFT computes the Fourier transform of short overlapping windows of the +#' input. This giving frequency components of the signal as they change over +#' time. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(0, 1, 2, 3, 4)) +#' op_stft(x, 3, 2, 3) +#' ``` +#' +#' @returns +#' A list containing two tensors - the real and imaginary parts of the +#' STFT output. +#' +#' @param x +#' Input tensor. +#' +#' @param sequence_length +#' An integer representing the sequence length. +#' +#' @param sequence_stride +#' An integer representing the sequence hop size. +#' +#' @param fft_length +#' An integer representing the size of the FFT to apply. If not +#' specified, uses the smallest power of 2 enclosing `sequence_length`. +#' +#' @param window +#' A string, a tensor of the window or `NULL`. If `window` is a +#' string, available values are `"hann"` and `"hamming"`. If `window` +#' is a tensor, it will be used directly as the window and its length +#' must be `sequence_length`. If `window` is `NULL`, no windowing is +#' used. Defaults to `"hann"`. +#' +#' @param center +#' Whether to pad `x` on both sides so that the t-th sequence is +#' centered at time `t * sequence_stride`. Otherwise, the t-th sequence +#' begins at time `t * sequence_stride`. Defaults to `TRUE`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.stft +op_stft <- +function (x, sequence_length, sequence_stride, fft_length, window = "hann", + center = TRUE) +{ + args <- capture_args(list(sequence_length = as_integer, + sequence_stride = as_integer, fft_length = as_integer)) + do.call(keras$ops$stft, args) +} + + +#' Finds the top-k values and their indices in a tensor. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(5, 2, 7, 1, 9, 3), "int32") +#' op_top_k(x, k = 3) +#' ``` +#' +#' ```{r} +#' c(values, indices) %<-% op_top_k(x, k = 3) +#' values +#' indices +#' ``` +#' +#' @returns +#' A list containing two tensors. The first tensor contains the +#' top-k values, and the second tensor contains the indices of the +#' top-k values in the input tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param k +#' An integer representing the number of top elements to retrieve. +#' +#' @param sorted +#' A boolean indicating whether to sort the output in +#' descending order. Defaults to`TRUE`. +#' +#' @export +#' @family math ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.top_k +op_top_k <- +function (x, k, sorted = TRUE) +{ + args <- capture_args(list(k = as_integer)) + do.call(keras$ops$top_k, args) +} + + +#' @export +py_to_r.tensorflow.python.ops.gen_nn_ops.TopKV2 <- function(x) { + x <- py_eval("tuple")(x) + names(x) <- c("values", "indices") + x +} + + +#' Average pooling operation. +#' +#' @returns +#' A tensor of rank N+2, the result of the average pooling operation. +#' +#' @param inputs +#' Tensor of rank N+2. `inputs` has shape +#' `(batch_size,) + inputs_spatial_shape + (num_channels,)` if +#' `data_format = "channels_last"`, or +#' `(batch_size, num_channels) + inputs_spatial_shape` if +#' `data_format = "channels_first"`. Pooling happens over the spatial +#' dimensions only. +#' +#' @param pool_size +#' int or tuple/list of integers of size +#' `len(inputs_spatial_shape)`, specifying the size of the pooling +#' window for each spatial dimension of the input tensor. If +#' `pool_size` is int, then every spatial dimension shares the same +#' `pool_size`. +#' +#' @param strides +#' int or tuple/list of integers of size +#' `len(inputs_spatial_shape)`. The stride of the sliding window for +#' each spatial dimension of the input tensor. If `strides` is int, +#' then every spatial dimension shares the same `strides`. +#' +#' @param padding +#' string, either `"valid"` or `"same"`. `"valid"` means no +#' padding is applied, and `"same"` results in padding evenly to the +#' left/right or up/down of the input such that output has the +#' same height/width dimension as the input when `strides = 1`. +#' +#' @param data_format +#' A string, either `"channels_last"` or `"channels_first"`. +#' `data_format` determines the ordering of the dimensions in the +#' inputs. If `data_format = "channels_last"`, `inputs` is of shape +#' `(batch_size, ..., channels)` while if +#' `data_format = "channels_first"`, `inputs` is of shape +#' `(batch_size, channels, ...)`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.average_pool +op_average_pool <- +function (inputs, pool_size, strides = NULL, padding = "valid", + data_format = NULL) +{ + args <- capture_args(list(pool_size = as_integer, strides = as_integer)) + do.call(keras$ops$average_pool, args) +} + + +#' Computes binary cross-entropy loss between target and output tensor. +#' +#' @description +#' The binary cross-entropy loss is commonly used in binary +#' classification tasks where each input sample belongs to one +#' of the two classes. It measures the dissimilarity between the +#' target and output probabilities or logits. +#' +#' # Examples +#' ```{r} +#' target <- op_array(c(0, 1, 1, 0)) +#' output <- op_array(c(0.1, 0.9, 0.8, 0.2)) +#' op_binary_crossentropy(target, output) +#' ``` +#' +#' @returns +#' Integer tensor: The computed binary cross-entropy loss between +#' `target` and `output`. +#' +#' @param target +#' The target tensor representing the true binary labels. +#' Its shape should match the shape of the `output` tensor. +#' +#' @param output +#' The output tensor representing the predicted probabilities +#' or logits. Its shape should match the shape of the +#' `target` tensor. +#' +#' @param from_logits +#' (optional) Whether `output` is a tensor of logits or +#' probabilities. +#' Set it to `TRUE` if `output` represents logits; otherwise, +#' set it to `FALSE` if `output` represents probabilities. +#' Defaults to `FALSE`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.binary_crossentropy +op_binary_crossentropy <- +function (target, output, from_logits = FALSE) +keras$ops$binary_crossentropy(target, output, from_logits) + + +#' Computes categorical cross-entropy loss between target and output tensor. +#' +#' @description +#' The categorical cross-entropy loss is commonly used in multi-class +#' classification tasks where each input sample can belong to one of +#' multiple classes. It measures the dissimilarity +#' between the target and output probabilities or logits. +#' +#' # Examples +#' ```{r} +#' target <- op_array(rbind(c(1, 0, 0), +#' c(0, 1, 0), +#' c(0, 0, 1))) +#' output <- op_array(rbind(c(0.9, 0.05, 0.05), +#' c(0.1, 0.8, 0.1), +#' c(0.2, 0.3, 0.5))) +#' op_categorical_crossentropy(target, output) +#' ``` +#' +#' @returns +#' Integer tensor: The computed categorical cross-entropy loss between +#' `target` and `output`. +#' +#' @param target +#' The target tensor representing the true categorical labels. +#' Its shape should match the shape of the `output` tensor +#' except for the last dimension. +#' +#' @param output +#' The output tensor representing the predicted probabilities +#' or logits. Its shape should match the shape of the `target` +#' tensor except for the last dimension. +#' +#' @param from_logits +#' (optional) Whether `output` is a tensor of logits or +#' probabilities. +#' Set it to `TRUE` if `output` represents logits; otherwise, +#' set it to `FALSE` if `output` represents probabilities. +#' Defaults to `FALSE`. +#' +#' @param axis +#' (optional) The axis along which the categorical cross-entropy +#' is computed. +#' Defaults to `-1`, which corresponds to the last dimension of +#' the tensors. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.categorical_crossentropy +op_categorical_crossentropy <- +function (target, output, from_logits = FALSE, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$categorical_crossentropy, args) +} + + +#' General N-D convolution. +#' +#' @description +#' This ops supports 1D, 2D and 3D convolution. +#' +#' @returns +#' A tensor of rank N+2, the result of the conv operation. +#' +#' @param inputs +#' Tensor of rank N+2. `inputs` has shape +#' `(batch_size,) + inputs_spatial_shape + (num_channels,)` if +#' `data_format = "channels_last"`, or +#' `(batch_size, num_channels) + inputs_spatial_shape` if +#' `data_format = "channels_first"`. +#' +#' @param kernel +#' Tensor of rank N+2. `kernel` has shape +#' `(kernel_spatial_shape, num_input_channels, num_output_channels)`. +#' `num_input_channels` should match the number of channels in +#' `inputs`. +#' +#' @param strides +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the strides of the convolution along each spatial +#' dimension. If `strides` is int, then every spatial dimension shares +#' the same `strides`. +#' +#' @param padding +#' string, either `"valid"` or `"same"`. `"valid"` means no +#' padding is applied, and `"same"` results in padding evenly to the +#' left/right or up/down of the input such that output has the +#' same height/width dimension as the input when `strides = 1`. +#' +#' @param data_format +#' A string, either `"channels_last"` or `"channels_first"`. +#' `data_format` determines the ordering of the dimensions in the +#' inputs. If `data_format = "channels_last"`, `inputs` is of shape +#' `(batch_size, ..., channels)` while if +#' `data_format = "channels_first"`, `inputs` is of shape +#' `(batch_size, channels, ...)`. +#' +#' @param dilation_rate +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the dilation rate to use for dilated convolution. If +#' `dilation_rate` is int, then every spatial dimension shares +#' the same `dilation_rate`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.conv +op_conv <- +function (inputs, kernel, strides = 1L, padding = "valid", data_format = NULL, + dilation_rate = 1L) +{ + args <- capture_args(list(strides = as_integer, dilation_rate = as_integer)) + do.call(keras$ops$conv, args) +} + + +#' General N-D convolution transpose. +#' +#' @description +#' Also known as de-convolution. This ops supports 1D, 2D and 3D convolution. +#' +#' @returns +#' A tensor of rank N+2, the result of the conv operation. +#' +#' @param inputs +#' Tensor of rank N+2. `inputs` has shape +#' `(batch_size,) + inputs_spatial_shape + (num_channels,)` if +#' `data_format = "channels_last"`, or +#' `(batch_size, num_channels) + inputs_spatial_shape` if +#' `data_format = "channels_first"`. +#' +#' @param kernel +#' Tensor of rank N+2. `kernel` has shape +#' `[kernel_spatial_shape, num_output_channels, num_input_channels],` +#' `num_input_channels` should match the number of channels in +#' `inputs`. +#' +#' @param strides +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the strides of the convolution along each spatial +#' dimension. If `strides` is int, then every spatial dimension shares +#' the same `strides`. +#' +#' @param padding +#' string, either `"valid"` or `"same"`. `"valid"` means no +#' padding is applied, and `"same"` results in padding evenly to the +#' left/right or up/down of the input such that output has the +#' same height/width dimension as the input when `strides = 1`. +#' +#' @param output_padding +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the amount of padding along the height and width of +#' the output tensor. Can be a single integer to specify the same +#' value for all spatial dimensions. The amount of output padding +#' along a given dimension must be lower than the stride along that +#' same dimension. If set to `NULL` (default), the output shape is +#' inferred. +#' +#' @param data_format +#' A string, either `"channels_last"` or `"channels_first"`. +#' `data_format` determines the ordering of the dimensions in the +#' inputs. If `data_format = "channels_last"`, `inputs` is of shape +#' `(batch_size, ..., channels)` while if +#' `data_format = "channels_first"`, `inputs` is of shape +#' `(batch_size, channels, ...)`. +#' +#' @param dilation_rate +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the dilation rate to use for dilated convolution. If +#' `dilation_rate` is int, then every spatial dimension shares +#' the same `dilation_rate`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.conv_transpose +op_conv_transpose <- +function (inputs, kernel, strides, padding = "valid", output_padding = NULL, + data_format = NULL, dilation_rate = 1L) +{ + args <- capture_args(list(strides = as_integer, output_padding = as_integer, + dilation_rate = as_integer)) + do.call(keras$ops$conv_transpose, args) +} + + +#' General N-D depthwise convolution. +#' +#' @description +#' This ops supports 1D and 2D depthwise convolution. +#' +#' @returns +#' A tensor of rank N+2, the result of the depthwise conv operation. +#' +#' @param inputs +#' Tensor of rank N+2. `inputs` has shape +#' `(batch_size,) + inputs_spatial_shape + (num_channels,)` if +#' `data_format = "channels_last"`, or +#' `(batch_size, num_channels) + inputs_spatial_shape` if +#' `data_format = "channels_first"`. +#' +#' @param kernel +#' Tensor of rank N+2. `kernel` has shape +#' `[kernel_spatial_shape, num_input_channels, num_channels_multiplier],` +#' `num_input_channels` should match the number of channels in +#' `inputs`. +#' +#' @param strides +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the strides of the convolution along each spatial +#' dimension. If `strides` is int, then every spatial dimension shares +#' the same `strides`. +#' +#' @param padding +#' string, either `"valid"` or `"same"`. `"valid"` means no +#' padding is applied, and `"same"` results in padding evenly to the +#' left/right or up/down of the input such that output has the +#' same height/width dimension as the input when `strides = 1`. +#' +#' @param data_format +#' A string, either `"channels_last"` or `"channels_first"`. +#' `data_format` determines the ordering of the dimensions in the +#' inputs. If `data_format = "channels_last"`, `inputs` is of shape +#' `(batch_size, ..., channels)` while if +#' `data_format = "channels_first"`, `inputs` is of shape +#' `(batch_size, channels, ...)`. +#' +#' @param dilation_rate +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the dilation rate to use for dilated convolution. If +#' `dilation_rate` is int, then every spatial dimension shares +#' the same `dilation_rate`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.depthwise_conv +op_depthwise_conv <- +function (inputs, kernel, strides = 1L, padding = "valid", data_format = NULL, + dilation_rate = 1L) +{ + args <- capture_args(list(strides = as_integer, dilation_rate = as_integer)) + do.call(keras$ops$depthwise_conv, args) +} + + +#' Exponential Linear Unit activation function. +#' +#' @description +#' It is defined as: +#' +#' `f(x) = alpha * (exp(x) - 1.) for x < 0`, `f(x) = x for x >= 0`. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1., 0., 1.)) +#' op_elu(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param alpha +#' A scalar, slope of positive section. Defaults to `1.0`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.elu +op_elu <- +function (x, alpha = 1) +keras$ops$elu(x, alpha) + + +#' Gaussian Error Linear Unit (GELU) activation function. +#' +#' @description +#' If `approximate` is `TRUE`, it is defined as: +#' `f(x) = 0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))` +#' +#' Or if `approximate` is `FALSE`, it is defined as: +#' `f(x) = x * P(X <= x) = 0.5 * x * (1 + erf(x / sqrt(2)))`, +#' where `P(X) ~ N(0, 1)`. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1., 0., 1.)) +#' op_gelu(x) +#' op_gelu(x, FALSE) +#' ``` +#' +#' +#' ```{r op-gelu-plot} +#' x <- seq(-5, 5, .1) +#' plot(x, op_gelu(x), +#' type = "l", #, frame.plot = FALSE, +#' panel.first = grid()) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param approximate +#' Approximate version of GELU activation. Defaults to `TRUE`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.gelu +op_gelu <- +function (x, approximate = TRUE) +keras$ops$gelu(x, approximate) + + +#' +#' Hard sigmoid activation function. +#' +#' @description +#' It is defined as: +#' +#' `0 if x < -2.5`, `1 if x > 2.5`, `(0.2 * x) + 0.5 if -2.5 <= x <= 2.5`. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1., 0., 1.)) +#' op_hard_sigmoid(x) +#' ``` +#' +#' ```{r op-hard-sigmoid-plot} +#' x <- as.array(seq(-5, 5, .1)) +#' plot(x, op_hard_sigmoid(x), +#' type = 'l', panel.first = grid(), frame.plot = FALSE) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.hard_sigmoid +op_hard_sigmoid <- +function (x) +keras$ops$hard_sigmoid(x) + + +#' Leaky version of a Rectified Linear Unit activation function. +#' +#' @description +#' It allows a small gradient when the unit is not active, it is defined as: +#' +#' `f(x) = alpha * x for x < 0` or `f(x) = x for x >= 0`. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1., 0., 1.)) +#' op_leaky_relu(x) +#' # array([-0.2, 0. , 1. ], shape=(3,), dtype=float64) +#' ``` +#' ```{r op-leaky-relu-plot} +#' x <- seq(-5, 5, .1) +#' plot(x, op_leaky_relu(x), +#' type = 'l', panel.first = grid()) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param negative_slope +#' Slope of the activation function at x < 0. +#' Defaults to `0.2`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.leaky_relu +op_leaky_relu <- +function (x, negative_slope = 0.2) +keras$ops$leaky_relu(x, negative_slope) + + +#' Logarithm of the sigmoid activation function. +#' +#' @description +#' It is defined as `f(x) = log(1 / (1 + exp(-x)))`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(-0.541391, 0.0, 0.50, 5.0)) +#' op_log_sigmoid(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.log_sigmoid +op_log_sigmoid <- +function (x) +keras$ops$log_sigmoid(x) + + +#' Log-softmax activation function. +#' +#' @description +#' It is defined as: +#' `f(x) = x - max(x) - log(sum(exp(x - max(x))))` +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1., 0., 1.)) +#' op_log_softmax(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Integer, axis along which the log-softmax is applied. +#' Defaults to `-1`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.log_softmax +op_log_softmax <- +function (x, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$log_softmax, args) +} + + +#' Max pooling operation. +#' +#' @returns +#' A tensor of rank N+2, the result of the max pooling operation. +#' +#' @param inputs +#' Tensor of rank N+2. `inputs` has shape +#' `(batch_size,) + inputs_spatial_shape + (num_channels,)` if +#' `data_format = "channels_last"`, or +#' `(batch_size, num_channels) + inputs_spatial_shape` if +#' `data_format = "channels_first"`. Pooling happens over the spatial +#' dimensions only. +#' +#' @param pool_size +#' int or tuple/list of integers of size +#' `len(inputs_spatial_shape)`, specifying the size of the pooling +#' window for each spatial dimension of the input tensor. If +#' `pool_size` is int, then every spatial dimension shares the same +#' `pool_size`. +#' +#' @param strides +#' int or tuple/list of integers of size +#' `len(inputs_spatial_shape)`. The stride of the sliding window for +#' each spatial dimension of the input tensor. If `strides` is int, +#' then every spatial dimension shares the same `strides`. +#' +#' @param padding +#' string, either `"valid"` or `"same"`. `"valid"` means no +#' padding is applied, and `"same"` results in padding evenly to the +#' left/right or up/down of the input such that output has the +#' same height/width dimension as the input when `strides = 1`. +#' +#' @param data_format +#' A string, either `"channels_last"` or `"channels_first"`. +#' `data_format` determines the ordering of the dimensions in the +#' inputs. If `data_format = "channels_last"`, `inputs` is of shape +#' `(batch_size, ..., channels)` while if +#' `data_format = "channels_first"`, `inputs` is of shape +#' `(batch_size, channels, ...)`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.max_pool +op_max_pool <- +function (inputs, pool_size, strides = NULL, padding = "valid", + data_format = NULL) +{ + args <- capture_args(list(pool_size = as_integer, strides = as_integer)) + do.call(keras$ops$max_pool, args) +} + + +#' Calculates the mean and variance of `x`. +#' +#' @description +#' The mean and variance are calculated by aggregating the contents of `x` +#' across `axes`. If `x` is 1-D and `axes = c(1)` this is just the mean and +#' variance of a vector. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(0, 1, 2, 3, 100), dtype = "float32") +#' op_moments(x, axes = c(1)) +#' ``` +#' +#' @returns +#' A list containing two tensors - mean and variance. +#' +#' @param x +#' Input tensor. +#' +#' @param axes +#' A list of axes which to compute mean and variance. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. +#' +#' @param synchronized +#' Only applicable with the TensorFlow backend. +#' If `TRUE`, synchronizes the global batch statistics (mean and +#' variance) across all devices at each training step in a +#' distributed training strategy. If `FALSE`, each replica uses its own +#' local batch statistics. +#' +#' @export +#' @family nn ops +#' @family ops +# @seealso +# + +#' +#' @tether keras.ops.moments +op_moments <- +function (x, axes, keepdims = FALSE, synchronized = FALSE) +{ + args <- capture_args(list(axes = as_axis)) + do.call(keras$ops$moments, args) +} + + +#' Encodes integer labels as multi-hot vectors. +#' +#' @description +#' This function encodes integer labels as multi-hot vectors, where each label +#' is mapped to a binary value in the resulting vector. +#' +#' # Examples +#' ```{r} +#' data <- op_convert_to_tensor(c(0, 4)) +#' op_multi_hot(data, num_classes = 5) +#' ``` +#' +#' @returns +#' Tensor: The multi-hot encoded tensor. +#' +#' @param inputs +#' Tensor of integer labels to be converted to multi-hot vectors. +#' +#' @param num_classes +#' Integer, the total number of unique classes. +#' +#' @param axis +#' (optional) Axis along which the multi-hot encoding should be +#' added. Defaults to `-1`, which corresponds to the last dimension. +#' +#' @param dtype +#' (optional) The data type of the resulting tensor. Default +#' is backend's float type. +#' +#' @param sparse +#' Whether to return a sparse tensor; for backends that support +#' sparse tensors. +#' +#' @param ... For forward/backwards compatability +#' +#' @export +#' @family nn ops +#' @family ops +# @seealso +# + +#' +#' @tether keras.ops.multi_hot +op_multi_hot <- +function (inputs, num_classes, axis = -1L, dtype = NULL, sparse = FALSE, ...) +{ + args <- capture_args(list(inputs = as_integer, num_classes = as_integer, + axis = as_axis)) + do.call(keras$ops$multi_hot, args) +} + + +#' Converts integer tensor `x` into a one-hot tensor. +#' +#' @description +#' The one-hot encoding is a representation where each integer value is +#' converted into a binary vector with a length equal to `num_classes`, +#' and the index corresponding to the integer value is marked as 1, while +#' all other indices are marked as 0. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(1, 3, 2, 0), "int32") +#' op_one_hot(x, num_classes = 4) +#' # array([[0. 1. 0. 0.] +#' # [0. 0. 0. 1.] +#' # [0. 0. 1. 0.] +#' # [1. 0. 0. 0.]], shape=(4, 4), dtype=float32) +#' ``` +#' +#' @returns +#' Integer tensor: One-hot encoded tensor with the same shape as `x` +#' except for the specified `axis` dimension, which will have +#' a length of `num_classes`. The dtype of the output tensor +#' is determined by `dtype` or the default data type of the backend. +#' +#' @param x +#' Integer tensor to be encoded. The shape can be +#' arbitrary, but the dtype should be integer. +#' R factors are coerced to integer and offset to be 0-based, i.e., +#' `as.integer(x) - 1L`. +#' +#' @param num_classes +#' Number of classes for the one-hot encoding. +#' +#' @param axis +#' Axis along which the encoding is performed. Defaults to +#' `-1`, which represents the last axis. +#' +#' @param dtype +#' (Optional) Data type of the output tensor. If not +#' provided, it defaults to the default data type of the backend. +#' +#' @param sparse +#' Whether to return a sparse tensor; for backends that support +#' sparse tensors. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.one_hot +op_one_hot <- +function (x, num_classes, axis = -1L, dtype = NULL, sparse = FALSE) +{ + args <- capture_args(list( + x = function(x) { + if (inherits(x, "factor")) + array(as.integer(x) - 1L, dim = dim(x) %||% length(x)) + else + as_integer_array(x) + }, + axis = as_axis, + num_classes = as_integer)) + do.call(keras$ops$one_hot, args) +} + + +#' Rectified linear unit activation function. +#' +#' @description +#' It is defined as `f(x) = max(0, x)`. +#' +#' # Examples +#' ```{r} +#' x1 <- op_convert_to_tensor(c(-1, 0, 1, 0.2)) +#' op_relu(x1) +#' ``` +#' +#' ```{r op-relu-plot} +#' x <- seq(-10, 10, .1) +#' plot(x, op_relu(x)) +#' ``` +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.relu +op_relu <- +function (x) +keras$ops$relu(x) + + +#' Rectified linear unit activation function with upper bound of 6. +#' +#' @description +#' It is defined as `f(x) = op_clip(x, 0, 6)`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(-3, -2, 0.1, 0.2, 6, 8)) +#' op_relu6(x) +#' ``` +#' ```{r op-relu6-plot} +#' x <- seq(-10, 10, .1) +#' plot(x, op_relu6(x)) +#' ``` +#' +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.relu6 +op_relu6 <- +function (x) +keras$ops$relu6(x) + + +#' Scaled Exponential Linear Unit (SELU) activation function. +#' +#' @description +#' It is defined as: +#' +#' `f(x) = scale * alpha * (exp(x) - 1.) for x < 0`, +#' `f(x) = scale * x for x >= 0`. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1, 0, 1)) +#' op_selu(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.selu +op_selu <- +function (x) +keras$ops$selu(x) + + +#' General N-D separable convolution. +#' +#' @description +#' This ops supports 1D and 2D separable convolution. `separable_conv` is +#' a depthwise conv followed by a pointwise conv. +#' +#' @returns +#' A tensor of rank N+2, the result of the depthwise conv operation. +#' +#' @param inputs +#' Tensor of rank N+2. `inputs` has shape +#' `(batch_size,) + inputs_spatial_shape + (num_channels,)` if +#' `data_format="channels_last"`, or +#' `(batch_size, num_channels) + inputs_spatial_shape` if +#' `data_format="channels_first"`. +#' +#' @param depthwise_kernel +#' Tensor of rank N+2. `depthwise_kernel` has shape +#' `[kernel_spatial_shape, num_input_channels, num_channels_multiplier],` +#' `num_input_channels` should match the number of channels in +#' `inputs`. +#' +#' @param pointwise_kernel +#' Tensor of rank N+2. `pointwise_kernel` has shape +#' `(*ones_like(kernel_spatial_shape), +#' num_input_channels * num_channels_multiplier, num_output_channels)`. +#' +#' @param strides +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the strides of the convolution along each spatial +#' dimension. If `strides` is int, then every spatial dimension shares +#' the same `strides`. +#' +#' @param padding +#' string, either `"valid"` or `"same"`. `"valid"` means no +#' padding is applied, and `"same"` results in padding evenly to the +#' left/right or up/down of the input such that output has the +#' same height/width dimension as the input when `strides=1`. +#' +#' @param data_format +#' A string, either `"channels_last"` or `"channels_first"`. +#' `data_format` determines the ordering of the dimensions in the +#' inputs. If `data_format="channels_last"`, `inputs` is of shape +#' `(batch_size, ..., channels)` while if +#' `data_format="channels_first"`, `inputs` is of shape +#' `(batch_size, channels, ...)`. +#' +#' @param dilation_rate +#' int or int tuple/list of `len(inputs_spatial_shape)`, +#' specifying the dilation rate to use for dilated convolution. If +#' `dilation_rate` is int, then every spatial dimension shares +#' the same `dilation_rate`. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.separable_conv +op_separable_conv <- +function (inputs, depthwise_kernel, pointwise_kernel, strides = 1L, + padding = "valid", data_format = NULL, dilation_rate = 1L) +{ + args <- capture_args(list(strides = as_integer, dilation_rate = as_integer)) + do.call(keras$ops$separable_conv, args) +} + + +#' Sigmoid activation function. +#' +#' @description +#' It is defined as `f(x) = 1 / (1 + exp(-x))`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(-6, 1, 0, 1, 6)) +#' op_sigmoid(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.sigmoid +op_sigmoid <- +function (x) +keras$ops$sigmoid(x) + + +#' Sigmoid Linear Unit (SiLU) activation function, also known as Swish. +#' +#' @description +#' The SiLU activation function is computed by the sigmoid function multiplied +#' by its input. It is defined as `f(x) = x * sigmoid(x)`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(-6, 1, 0, 1, 6)) +#' op_sigmoid(x) +#' op_silu(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.silu +op_silu <- +function (x) +keras$ops$silu(x) + + +#' Softmax activation function. +#' +#' @description +#' The elements of the output vector lie within the range `(0, 1)`, and their +#' total sum is exactly 1 (excluding the floating point rounding error). +#' +#' Each vector is processed independently. The `axis` argument specifies the +#' axis along which the function is applied within the input. +#' +#' It is defined as: +#' `f(x) = exp(x) / sum(exp(x))` +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(-1, 0, 1)) +#' op_softmax(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Integer, axis along which the softmax is applied. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.softmax +op_softmax <- +function (x, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$softmax, args) +} + + +#' Softplus activation function. +#' +#' @description +#' It is defined as `f(x) = log(exp(x) + 1)`, where `log` is the natural +#' logarithm and `exp` is the exponential function. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(-0.555, 0, 0.555)) +#' op_softplus(x) +#' ``` +#' ```{r op-softplus-plot} +#' x <- seq(-10, 10, .1) +#' plot(x, op_softplus(x)) +#' ``` +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.softplus +op_softplus <- +function (x) +keras$ops$softplus(x) + + +#' Softsign activation function. +#' +#' @description +#' It is defined as `f(x) = x / (abs(x) + 1)`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(-0.100, -10.0, 1.0, 0.0, 100.0)) +#' op_softsign(x) +#' ``` +#' ```{r op-softsign-plot} +#' x <- seq(-10, 10, .1) +#' plot(x, op_softsign(x), ylim = c(-1, 1)) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.softsign +op_softsign <- +function (x) +keras$ops$softsign(x) + + +#' Computes sparse categorical cross-entropy loss. +#' +#' @description +#' The sparse categorical cross-entropy loss is similar to categorical +#' cross-entropy, but it is used when the target tensor contains integer +#' class labels instead of one-hot encoded vectors. It measures the +#' dissimilarity between the target and output probabilities or logits. +#' +#' # Examples +#' ```{r} +#' target <- op_array(c(0, 1, 2), dtype="int32") +#' output <- op_array(rbind(c(0.9, 0.05, 0.05), +#' c(0.1, 0.8, 0.1), +#' c(0.2, 0.3, 0.5))) +#' op_sparse_categorical_crossentropy(target, output) +#' ``` +#' +#' @returns +#' Integer tensor: The computed sparse categorical cross-entropy +#' loss between `target` and `output`. +#' +#' @param target +#' The target tensor representing the true class labels as +#' integers. Its shape should match the shape of the `output` +#' tensor except for the last dimension. +#' +#' @param output +#' The output tensor representing the predicted probabilities +#' or logits. +#' Its shape should match the shape of the `target` tensor except +#' for the last dimension. +#' +#' @param from_logits +#' (optional) Whether `output` is a tensor of logits +#' or probabilities. +#' Set it to `TRUE` if `output` represents logits; otherwise, +#' set it to `FALSE` if `output` represents probabilities. +#' Defaults to`FALSE`. +#' +#' @param axis +#' (optional) The axis along which the sparse categorical +#' cross-entropy is computed. +#' Defaults to `-1`, which corresponds to the last dimension +#' of the tensors. +#' +#' @export +#' @family nn ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.sparse_categorical_crossentropy +op_sparse_categorical_crossentropy <- +function (target, output, from_logits = FALSE, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$sparse_categorical_crossentropy, args) +} + + +#' Compute the absolute value element-wise. +#' +#' @param x +#' Input tensor +#' +#' @returns +#' An array containing the absolute value of each element in `x`. +#' +#' @description +#' +#' # Example +#' ```{r} +#' x <- op_convert_to_tensor(c(-1.2, 1.2)) +#' op_abs(x) +#' ``` +#' +#' @export +#' @family numpy ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.absolute +op_abs <- +function (x) +keras$ops$absolute(x) + + +#' Add arguments element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x1 <- op_convert_to_tensor(c(1, 4)) +#' x2 <- op_convert_to_tensor(c(5, 6)) +#' op_add(x1, x2) +#' # alias for x1 + x2 +#' x1 + x2 +#' ``` +#' +#' `op_add` also broadcasts shapes: +#' ```{r} +#' x1 <- op_convert_to_tensor(array(c(5, 5, 4, 6), dim =c(2, 2))) +#' x2 <- op_convert_to_tensor(c(5, 6)) +#' op_add(x1, x2) +#' ``` +#' +#' Note that this function is automatically called when using the R operator `+` with tensors. +#' ```{r} +#' x <- op_ones(c(3)) +#' op_add(x, x) +#' x + x +#' ``` +#' +#' @returns +#' The tensor containing the element-wise sum of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.add +op_add <- +function (x1, x2) +keras$ops$add(x1, x2) + + +#' Test whether all array elements along a given axis evaluate to `TRUE`. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(TRUE, FALSE)) +#' op_all(x) +#' ``` +#' +#' ```{r} +#' (x <- op_convert_to_tensor(array(c(TRUE, FALSE, TRUE, TRUE, TRUE, TRUE), dim = c(3, 2)))) +#' op_all(x, axis = 1) +#' ``` +#' +#' `keepdims = TRUE` outputs a tensor with dimensions reduced to one. +#' ```{r} +#' op_all(x, keepdims = TRUE) +#' ``` +#' +#' @returns +#' The tensor containing the logical AND reduction over the `axis`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' An integer or tuple of integers that represent the axis along +#' which a logical AND reduction is performed. The default +#' (`axis = NULL`) is to perform a logical AND over all the dimensions +#' of the input array. `axis` may be negative, in which case it counts +#' for the last to the first axis. +#' +#' @param keepdims +#' If `TRUE`, axes which are reduced are left in the result as +#' dimensions with size one. With this option, the result will +#' broadcast correctly against the input array. Defaults to `FALSE`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.all +op_all <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$all, args) +} + + +# Returns the maximum of a vector or maximum value along an axis. +# +# @description +# +# `op_amax()` performs the same computation as [`op_max()`] +# +# # Examples +# ```{r, include = FALSE} +# op_amax <- op_max +# ``` +# ```{r} +# (x <- op_convert_to_tensor(rbind(c(1, 3, 5), c(1, 5, 2)))) +# op_amax(x) +# op_amax(x, axis = 1) +# op_amax(x, axis = 1, keepdims = TRUE) +# ``` +# +# @returns +# A tensor with the maximum value. If `axis = NULL`, the result is a scalar +# value representing the maximum element in the entire tensor. If `axis` is +# given, the result is a tensor with the maximum values along +# the specified axis. +# +# @param x +# Input tensor. +# +# @param axis +# Axis along which to compute the maximum. +# By default (`axis = NULL`), find the maximum value in all the +# dimensions of the input tensor. +# +# @param keepdims +# If `TRUE`, axes which are reduced are left in the result as +# dimensions that are broadcast to the size of the original +# input tensor. Defaults to `FALSE`. +# +# @export +# @noRd +# @keywords internal +# @family numpy ops +# @family ops +# @seealso +# + +# + +# @tether keras.ops.amax +# op_amax <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$amax, args) +} + + +# Returns the minimum of a vector or minimum value along an axis. +# +# @description +# +# `op_amin()` performs the same computation as [`op_min()`] +# +# # Examples +# ```{r, include = FALSE} +# op_amin <- op_min +# ``` +# ```{r} +# (x <- op_convert_to_tensor(rbind(c(1, 3, 5), c(1, 5, 2)))) +# op_amin(x) +# op_amin(x, axis = 1) +# op_amin(x, axis = 1, keepdims = TRUE) +# ``` +# +# @returns +# A tensor with the minimum value. If `axis = NULL`, the result is a scalar +# value representing the minimum element in the entire tensor. If `axis` is +# given, the result is a tensor with the minimum values along +# the specified axis. +# +# @param x +# Input tensor. +# +# @param axis +# Axis along which to compute the minimum. +# By default (`axis = NULL`), find the minimum value in all the +# dimensions of the input tensor. +# +# @param keepdims +# If `TRUE`, axes which are reduced are left in the result as +# dimensions that are broadcast to the size of the original +# input tensor. Defaults to `FALSE`. +# +# @export +# @noRd +# @keywords internal +# @family numpy ops +# @family ops +# @seealso +# + +# + +# @tether keras.ops.amin +# op_amin <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$amin, args) +} + + +#' Test whether any array element along a given axis evaluates to `TRUE`. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(TRUE, FALSE)) +#' op_any(x) +#' ``` +#' +#' ```{r} +#' (x <- op_reshape(c(FALSE, FALSE, FALSE, +#' TRUE, FALSE, FALSE), +#' c(2, 3))) +#' op_any(x, axis = 1) +#' op_any(x, axis = 2) +#' op_any(x, axis = -1) +#' ``` +#' +#' `keepdims = TRUE` outputs a tensor with dimensions reduced to one. +#' ```{r} +#' op_any(x, keepdims = TRUE) +#' op_any(x, 1, keepdims = TRUE) +#' ``` +#' +#' @returns +#' The tensor containing the logical OR reduction over the `axis`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' An integer or tuple of integers that represent the axis along +#' which a logical OR reduction is performed. The default +#' (`axis = NULL`) is to perform a logical OR over all the dimensions +#' of the input array. `axis` may be negative, in which case it counts +#' for the last to the first axis. +#' +#' @param keepdims +#' If `TRUE`, axes which are reduced are left in the result as +#' dimensions with size one. With this option, the result will +#' broadcast correctly against the input array. Defaults to `FALSE`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.any +op_any <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$any, args) +} + + +#' Append tensor `x2` to the end of tensor `x1`. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x1 <- op_convert_to_tensor(c(1, 2, 3)) +#' x2 <- op_convert_to_tensor(rbind(c(4, 5, 6), c(7, 8, 9))) +#' op_append(x1, x2) +#' ``` +#' +#' When `axis` is specified, `x1` and `x2` must have compatible shapes. +#' ```{r} +#' x1 <- op_convert_to_tensor(rbind(c(1, 2, 3), c(4, 5, 6))) +#' x2 <- op_convert_to_tensor(rbind(c(7, 8, 9))) +#' op_append(x1, x2, axis = 1) +#' x3 <- op_convert_to_tensor(c(7, 8, 9)) +#' try(op_append(x1, x3, axis = 1)) +#' ``` +#' +#' @returns +#' A tensor with the values of `x2` appended to `x1`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @param axis +#' Axis along which tensor `x2` is appended to tensor `x1`. +#' If `NULL`, both tensors are flattened before use. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.append +op_append <- +function (x1, x2, axis = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$append, args) +} + + +#' Return evenly spaced values within a given interval. +#' +#' @description +#' `arange` can be called with a varying number of positional arguments: +#' * `arange(stop)`: Values are generated within the half-open interval +#' `[0, stop)` (in other words, the interval including `start` but excluding +#' `stop`). +#' * `arange(start, stop)`: Values are generated within the half-open interval +#' `[start, stop)`. +#' * `arange(start, stop, step)`: Values are generated within the half-open +#' interval `[start, stop)`, with spacing between values given by step. +#' +#' # Examples +#' ```{r} +#' op_arange(3L) +#' op_arange(3) # float +#' op_arange(3, dtype = 'int32') #int +#' op_arange(3L, 7L) +#' op_arange(3L, 7L, 2L) +#' ``` +#' +#' @returns +#' Tensor of evenly spaced values. +#' For floating point arguments, the length of the result is +#' `ceiling((stop - start)/step)`. Because of floating point overflow, this +#' rule may result in the last element of out being greater than stop. +#' +#' @param start +#' Integer or real, representing the start of the interval. The +#' interval includes this value. +#' +#' @param stop +#' Integer or real, representing the end of the interval. The +#' interval does not include this value, except in some cases where +#' `step` is not an integer and floating point round-off affects the +#' length of `out`. Defaults to `NULL`. +#' +#' @param step +#' Integer or real, represent the spacing between values. For any +#' output `out`, this is the distance between two adjacent values, +#' `out[i+1] - out[i]`. The default step size is 1. If `step` is +#' specified as a position argument, `start` must also be given. +#' +#' @param dtype +#' The type of the output array. If `dtype` is not given, infer the +#' data type from the other input arguments. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.arange +op_arange <- +function (start, stop = NULL, step = 1L, dtype = NULL) +{ + args <- capture_args(list( + start = function(x) np_array(x, dtype), + stop = function(x) np_array(x, dtype), + step = function(x) np_array(x, dtype) + )) + do.call(keras$ops$arange, args) +} + + +#' Trigonometric inverse cosine, element-wise. +#' +#' @description +#' The inverse of `cos` so that, if `y = cos(x)`, then `x = arccos(y)`. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(1, -1)) +#' op_arccos(x) +#' ``` +#' +#' @returns +#' Tensor of the angle of the ray intersecting the unit circle at the given +#' x-coordinate in radians `[0, pi]`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.arccos +op_arccos <- +function (x) +keras$ops$arccos(x) + + +#' Inverse hyperbolic cosine, element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(10, 100)) +#' op_arccosh(x) +#' ``` +#' +#' @returns +#' Output tensor of same shape as x. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.arccosh +op_arccosh <- +function (x) +keras$ops$arccosh(x) + + +#' Inverse sine, element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(1, -1, 0)) +#' op_arcsin(x) +#' ``` +#' +#' @returns +#' Tensor of the inverse sine of each element in `x`, in radians and in +#' the closed interval `[-pi/2, pi/2]`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.arcsin +op_arcsin <- +function (x) +keras$ops$arcsin(x) + + +#' Inverse hyperbolic sine, element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(1, -1, 0)) +#' op_arcsinh(x) +#' ``` +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.arcsinh +op_arcsinh <- +function (x) +keras$ops$arcsinh(x) + + +#' Trigonometric inverse tangent, element-wise. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(0, 1)) +#' op_arctan(x) +#' ``` +#' +#' @returns +#' Tensor of the inverse tangent of each element in `x`, in the interval +#' `[-pi/2, pi/2]`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.arctan +op_arctan <- +function (x) +keras$ops$arctan(x) + + +#' Element-wise arc tangent of `x1/x2` choosing the quadrant correctly. +#' +#' @description +#' The quadrant (i.e., branch) is chosen so that `arctan2(x1, x2)` is the +#' signed angle in radians between the ray ending at the origin and passing +#' through the point `(1, 0)`, and the ray ending at the origin and passing +#' through the point `(x2, x1)`. (Note the role reversal: the "y-coordinate" +#' is the first function parameter, the "x-coordinate" is the second.) By IEEE +#' convention, this function is defined for `x2 = +/-0` and for either or both +#' of `x1` and `x2` `= +/-inf`. +#' +#' # Examples +#' Consider four points in different quadrants: +#' ```{r} +#' x <- op_array(c(-1, 1, 1, -1)) +#' y <- op_array(c(-1, -1, 1, 1)) +#' op_arctan2(y, x) * 180 / pi +#' ``` +#' +#' Note the order of the parameters. `arctan2` is defined also when x2 = 0 and +#' at several other points, obtaining values in the range `[-pi, pi]`: +#' ```{r} +#' op_arctan2( +#' op_array(c(1, -1)), +#' op_array(c(0, 0)) +#' ) +#' op_arctan2( +#' op_array(c(0, 0, Inf)), +#' op_array(c(+0, -0, Inf)) +#' ) +#' ``` +#' +#' @returns +#' Tensor of angles in radians, in the range `[-pi, pi]`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.arctan2 +op_arctan2 <- +function (x1, x2) +keras$ops$arctan2(x1, x2) + + +#' Inverse hyperbolic tangent, element-wise. +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.arctanh +op_arctanh <- +function (x) +keras$ops$arctanh(x) + + +#' Returns the indices of the maximum values along an axis. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_arange(6L) |> op_reshape(c(2, 3)) |> op_add(10) +#' x +#' op_argmax(x) +#' op_argmax(x, axis = 1) +#' op_argmax(x, axis = 2) +#' ``` +#' +#' @note +#' This is similar to R `max.col(x) - 1` for the case of a 2-d array (a matrix), +#' or for an nd-array, `apply(x, axis, which.max) - 1` +#' +#' @returns +#' Tensor of indices. It has the same shape as `x`, with the dimension +#' along `axis` removed. Note that the returned integer is 0-based (i.e., if the +#' argmax is in the first index position, the returned value will be `0`) +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' By default, the index is into the flattened tensor, otherwise +#' along the specified axis. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. Defaults to `FALSE`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.argmax +op_argmax <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$argmax, args) +} + + +#' Returns the indices of the minimum values along an axis. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_arange(6L) |> op_reshape(c(2, 3)) |> op_add(10) +#' x +#' op_argmin(x) +#' op_argmin(x, axis = 1) +#' op_argmin(x, axis = 2) +#' ``` +#' +#' @note +#' This is similar to an R expression `apply(x, axis, which.min) - 1`, where `x` +#' is a R array. +#' +#' @returns +#' Tensor of indices. It has the same shape as `x`, with the dimension +#' along `axis` removed. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' By default, the index is into the flattened tensor, otherwise +#' along the specified axis. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. Defaults to `FALSE`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.argmin +op_argmin <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$argmin, args) +} + + +#' Returns the indices that would sort a tensor. +#' +#' @description +#' +#' # Examples +#' One dimensional array: +#' ```{r} +#' x <- op_array(c(3, 1, 2)) +#' op_argsort(x) +#' ``` +#' +#' Two-dimensional array: +#' ```{r} +#' x <- op_array(rbind(c(0, 3), +#' c(3, 2), +#' c(4, 5)), dtype = "int32") +#' op_argsort(x, axis = 1) +#' op_argsort(x, axis = 2) +#' ``` +#' +#' @returns +#' Tensor of indices that sort `x` along the specified `axis`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis along which to sort. Defaults to `-1` (the last axis). If +#' `NULL`, the flattened tensor is used. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.argsort +op_argsort <- +function (x, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$argsort, args) +} + + +#' Create a tensor. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' op_array(c(1, 2, 3)) +#' op_array(c(1, 2, 3), dtype = "float32") +#' op_array(c(1, 2, 3), dtype = "int32") +#' ``` +#' +#' @returns +#' A tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param dtype +#' The desired data-type for the tensor. +# ' If `x` is an R double vector or array +# ' `dtype` defaults to `config_floatx()` ("float32" by default) +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.array +op_array <- +function (x, dtype = NULL) +{ + if (!is.null(dtype) && is_string(dtype) && + typeof(x) == "double" && + grepl("int", dtype, fixed = TRUE)) + storage.mode(x) <- "integer" + keras$ops$array(x, dtype) +} + + +#' Compute the weighted average along the specified axis. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' data <- op_arange(1, 5, dtype = "int32") +#' data +#' op_average(data) +#' +#' op_average( +#' op_arange(1, 11), +#' weights = op_arange(10, 0, -1) +#' ) +#' +#' data <- op_arange(6) |> op_reshape(c(3, 2)) +#' data +#' +#' op_average( +#' data, +#' axis = 2, +#' weights = op_array(c(1/4, 3/4)) +#' ) +#' +#' # Error: Axis must be specified when shapes of x and weights differ. +#' try(op_average( +#' data, +#' weights = op_array(c(1/4, 3/4)) +#' )) +#' ``` +#' +#' @returns +#' Return the average along the specified axis. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Integer along which to average `x`. The default, `axis = NULL`, +#' will average over all of the elements of the input tensor. If axis +#' is negative it counts from the last to the first axis. +#' +#' @param weights +#' Tensor of wieghts associated with the values in `x`. Each +#' value in `x` contributes to the average according to its +#' associated weight. The weights array can either be 1-D (in which +#' case its length must be the size of a along the given axis) or of +#' the same shape as `x`. If `weights = NULL` (default), then all data +#' in `x` are assumed to have a weight equal to one. +#' +#' The 1-D calculation is: `avg = sum(a * weights) / sum(weights)`. +#' The only constraint on weights is that `sum(weights)` must not be 0. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.average +op_average <- +function (x, axis = NULL, weights = NULL) +{ + args <- capture_args(list(axis = as_axis)) + # BUG guardrail. In Keras 3.3.2, this started silently (wrongly) succeeding + # where it would return the sum of the axis reductions rather than throwing + # an exception + # We require here that users pass `axis` if passing weights with a different shape. + if(!is.null(weights) && is.null(axis) && + !identical(op_shape(weights), op_shape(x))) + stop("Axis must be specified when shapes of x and weights differ.") + do.call(keras$ops$average, args) +} + + +#' Count the number of occurrences of each value in a tensor of integers. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' (x <- op_array(c(1, 2, 2, 3), dtype = "uint8")) +#' op_bincount(x) +#' +#' (weights <- x / 2) +#' op_bincount(x, weights = weights) +#' +#' minlength <- as.integer(op_max(x) + 1 + 2) # 6 +#' op_bincount(x, minlength = minlength) +#' ``` +#' +#' @returns +#' 1D tensor where each element gives the number of occurrence(s) of its +#' index value in x. Its length is the maximum between `max(x) + 1` and +#' minlength. +#' +#' @param x +#' Input tensor. +#' It must be of dimension 1, and it must only contain non-negative +#' integer(s). +#' +#' @param weights +#' Weight tensor. +#' It must have the same length as `x`. The default value is `NULL`. +#' If specified, `x` is weighted by it, i.e. if `n = x[i]`, +#' `out[n] += weight[i]` instead of the default behavior `out[n] += 1`. +#' +#' @param minlength +#' An integer. +#' The default value is 0. If specified, there will be at least +#' this number of bins in the output tensor. If greater than +#' `max(x) + 1`, each value of the output at an index higher than +#' `max(x)` is set to 0. +#' +#' @param sparse +#' Whether to return a sparse tensor; for backends that support +#' sparse tensors. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.bincount +op_bincount <- +function (x, weights = NULL, minlength = 0L, sparse = FALSE) +{ + args <- capture_args(list(x = as_integer, minlength = as_integer)) + do.call(keras$ops$bincount, args) +} + + +#' Broadcast a tensor to a new shape. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(1, 2, 3)) +#' op_broadcast_to(x, shape = c(3, 3)) +#' ``` +#' +#' @returns +#' A tensor with the desired shape. +#' +#' @param x +#' The tensor to broadcast. +#' +#' @param shape +#' The shape of the desired tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.broadcast_to +op_broadcast_to <- +function (x, shape) +{ + args <- capture_args(list(shape = normalize_shape)) + do.call(keras$ops$broadcast_to, args) +} + + +#' Return the ceiling of the input, element-wise. +#' +#' @description +#' The ceil of the scalar `x` is the smallest integer `i`, such that +#' `i >= x`. +#' +#' @returns +#' The ceiling of each element in `x`, with float dtype. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.ceil +op_ceil <- +function (x) +keras$ops$ceil(x) + + +#' Clip (limit) the values in a tensor. +#' +#' @description +#' Given an interval, values outside the interval are clipped to the +#' interval edges. For example, if an interval of `[0, 1]` is specified, +#' values smaller than 0 become 0, and values larger than 1 become 1. +#' +#' @returns +#' The clipped tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param x_min +#' Minimum value. +#' +#' @param x_max +#' Maximum value. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.clip +op_clip <- +function (x, x_min, x_max) +keras$ops$clip(x, x_min, x_max) + + +#' Join a sequence of tensors along an existing axis. +#' +#' @returns +#' The concatenated tensor. +#' +#' @param xs +#' The sequence of tensors to concatenate. +#' +#' @param axis +#' The axis along which the tensors will be joined. Defaults to `0`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.concatenate +op_concatenate <- +function (xs, axis = 1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$concatenate, args) +} + + +# ' Shorthand for [`op_conjugate()`]. +# ' +# ' @param x +# ' see description +# ' +# ' @export +# ' @family numpy ops +# ' @family ops +# ' @seealso +# ' + +# ' + +# ' @tether keras.ops.conj +# op_conj <- +# function (x) +# keras$ops$conj(x) + + +#' Returns the complex conjugate, element-wise. +#' +#' @description +#' The complex conjugate of a complex number is obtained by changing the sign +#' of its imaginary part. +#' +#' @returns +#' The complex conjugate of each element in `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.conjugate +op_conj <- +function (x) +keras$ops$conjugate(x) + + +#' Returns a copy of `x`. +#' +#' @returns +#' A copy of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.copy +op_copy <- +function (x) +keras$ops$copy(x) + + +#' Compute the cross-correlation of two 1-dimensional tensors. +#' +#' @returns +#' Output tensor, cross-correlation of `x1` and `x2`. +#' +#' @param x1 +#' First 1-dimensional input tensor of length M. +#' +#' @param x2 +#' Second 1-dimensional input tensor of length N. +#' +#' @param mode +#' Either `"valid"`, `"same"` or `"full"`. +#' By default the mode is set to `"valid"`, which returns +#' an output of length `max(M, N) - min(M, N) + 1`. +#' `"same"` returns an output of length `max(M, N)`. +#' `"full"` mode returns the convolution at each point of +#' overlap, with an output length of `N+M-1`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.correlate +op_correlate <- +function (x1, x2, mode = "valid") +keras$ops$correlate(as_array(x1), as_array(x2), mode) + +#' Cosine, element-wise. +#' +#' @returns +#' The corresponding cosine values. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.cos +op_cos <- +function (x) +keras$ops$cos(x) + + +#' Hyperbolic cosine, element-wise. +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.cosh +op_cosh <- +function (x) +keras$ops$cosh(x) + + +#' Counts the number of non-zero values in `x` along the given `axis`. +#' +#' @description +#' If no axis is specified then all non-zeros in the tensor are counted. +#' +#' # Examples +#' ```{r} +#' x <- op_array(rbind(c(0, 1, 7, 0), +#' c(3, 0, 2, 19))) +#' op_count_nonzero(x) +#' op_count_nonzero(x, axis = 1) +#' +#' op_count_nonzero(x, axis = 2) +#' ``` +#' +#' @returns +#' An integer or a tensor of integers. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or a tuple of axes along which to count the number of +#' non-zeros. Defaults to `NULL`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.count_nonzero +op_count_nonzero <- +function (x, axis = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$count_nonzero, args) +} + + +#' Returns the cross product of two (arrays of) vectors. +#' +#' @description +#' The cross product of `x1` and `x2` in R^3 is a vector +#' perpendicular to both `x1` and `x2`. If `x1` and `x2` are arrays of +#' vectors, the vectors are defined by the last axis of `x1` and `x2` +#' by default, and these axes can have dimensions 2 or 3. +#' +#' Where the dimension of either `x1` or `x2` is 2, the third component of +#' the input vector is assumed to be zero and the cross product calculated +#' accordingly. +#' +#' In cases where both input vectors have dimension 2, the z-component of +#' the cross product is returned. +#' +#' # Note +#' Torch backend does not support two dimensional vectors, or the +#' arguments `axisa`, `axisb` and `axisc`. Use `axis` instead. +#' +#' @returns +#' Vector cross product(s). +#' +#' @param x1 +#' Components of the first vector(s). +#' +#' @param x2 +#' Components of the second vector(s). +#' +#' @param axisa +#' Axis of `x1` that defines the vector(s). Defaults to `-1`. +#' +#' @param axisb +#' Axis of `x2` that defines the vector(s). Defaults to `-1`. +#' +#' @param axisc +#' Axis of the result containing the cross product vector(s). +#' Ignored if both input vectors have dimension 2, as the return is +#' scalar. By default, the last axis. +#' +#' @param axis +#' If defined, the axis of `x1`, `x2` and the result that +#' defines the vector(s) and cross product(s). Overrides `axisa`, +#' `axisb` and `axisc`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.cross +op_cross <- +function (x1, x2, axisa = -1L, axisb = -1L, axisc = -1L, axis = NULL) +{ + args <- capture_args(list(axisa = as_integer, axisb = as_integer, + axisc = as_integer, axis = as_axis)) + do.call(keras$ops$cross, args) +} + +#' Decodes the output of a CTC model. +#' +#' @returns +#' A list containing: +#' +#' - A list of decoded sequences. +#' - A list of the negative of the sum of the probability logits +#' (if strategy is `"greedy"`) or the log probability (if strategy is +#' `"beam_search"`) for each sequence. +#' +#' @param inputs +#' A tensor of shape `(batch_size, max_length, num_classes)` +#' containing the logits (output of the model). +#' +#' @param sequence_lengths +#' A tensor of shape `(batch_size)` containing the +#' sequence lengths for the batch. +#' +#' @param strategy +#' A string for the decoding strategy. Supported values are +#' `"greedy"` and `"beam_search"`. +#' +#' @param beam_width +#' An integer scalar beam width used in beam search. +#' Defaults to `100`. +#' +#' @param top_paths +#' An integer scalar, the number of top paths to return. +#' Defaults to `1`. +#' +#' @param merge_repeated +#' A boolean scalar, whether to merge repeated +#' labels in the output. Defaults to `TRUE`. +#' +#' @param mask_index +#' An integer scalar, the index of the mask character in +#' the vocabulary. Defaults to `NULL`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.ctc_decode +op_ctc_decode <- +function (inputs, sequence_lengths, strategy, beam_width = 100L, + top_paths = 1L, merge_repeated = TRUE, mask_index = NULL) +{ + args <- capture_args(list( + sequence_lengths = as_integer_array, + beam_width = as_integer, + top_paths = as_integer, + mask_index = as_integer)) + do.call(keras$ops$ctc_decode, args) +} + +#' Return the cumulative product of elements along a given axis. +#' +#' @returns +#' Output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis along which the cumulative product is computed. +#' By default the input is flattened. +#' +#' @param dtype +#' dtype of returned tensor. Defaults to `x$dtype`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.cumprod +op_cumprod <- +function (x, axis = NULL, dtype = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$cumprod, args) +} + + +#' Returns the cumulative sum of elements along a given axis. +#' +#' @returns +#' Output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis along which the cumulative sum is computed. +#' By default the input is flattened. +#' +#' @param dtype +#' dtype of returned tensor. Defaults to `x$dtype`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.cumsum +op_cumsum <- +function (x, axis = NULL, dtype = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$cumsum, args) +} + + +#' Extract a diagonal or construct a diagonal array. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_arange(9L) |> op_reshape(c(3, 3)) +#' x +#' op_diag(x) +#' op_diag(x, k = 1) +#' op_diag(x, k = -1) +#' op_diag(op_diag(x)) +#' ``` +#' +#' @returns +#' The extracted diagonal or constructed diagonal tensor. +#' +#' @param x +#' Input tensor. If `x` is 2-D, returns the k-th diagonal of `x`. +#' If `x` is 1-D, return a 2-D tensor with `x` on the k-th diagonal. +#' +#' @param k +#' The diagonal to consider. Defaults to `0`. Use `k > 0` for diagonals +#' above the main diagonal, and `k < 0` for diagonals below +#' the main diagonal. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.diag +op_diag <- +function (x, k = 0L) +{ + args <- capture_args(list(k = as_integer)) + do.call(keras$ops$diag, args) +} + + +#' Return specified diagonals. +#' +#' @description +#' If `x` is 2-D, returns the diagonal of `x` with the given offset, i.e., the +#' collection of elements of the form `x[i, i+offset]`. +#' +#' If `x` has more than two dimensions, the axes specified by `axis1` +#' and `axis2` are used to determine the 2-D sub-array whose diagonal +#' is returned. +#' +#' The shape of the resulting array can be determined by removing `axis1` +#' and `axis2` and appending an index to the right equal to the size of +#' the resulting diagonals. +#' +#' # Examples +#' ```{r} +#' x <- op_arange(4L) |> op_reshape(c(2, 2)) +#' x +#' op_diagonal(x) +#' op_diagonal(x, offset = 1) +#' +#' x <- op_array(1:8) |> op_reshape(c(2, 2, 2)) +#' x +#' x |> op_diagonal(0) +#' x |> op_diagonal(0, 1, 2) # same as above, the default +#' x |> op_diagonal(0, 2, 3) +#' ``` +#' +#' @returns +#' Tensor of diagonals. +#' +#' @param x +#' Input tensor. +#' +#' @param offset +#' Offset of the diagonal from the main diagonal. +#' Can be positive or negative. Defaults to `0` (main diagonal). +#' +#' @param axis1 +#' Axis to be used as the first axis of the 2-D sub-arrays. +#' Defaults to `1` (first axis). +#' +#' @param axis2 +#' Axis to be used as the second axis of the 2-D sub-arrays. +#' Defaults to `2` (second axis). +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.diagonal +op_diagonal <- +function (x, offset = 0L, axis1 = 1L, axis2 = 2L) +{ + args <- capture_args(list(offset = as_integer, axis1 = as_axis, + axis2 = as_axis)) + do.call(keras$ops$diagonal, args) +} + + +#' Calculate the n-th discrete difference along the given axis. +#' +#' @description +#' The first difference is given by `out[i] = a[i+1] - a[i]` along +#' the given axis, higher differences are calculated by using `diff` +#' recursively. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(1, 2, 4, 7, 0)) +#' op_diff(x) +#' op_diff(x, n = 2) +#' x <- op_array(rbind(c(1, 3, 6, 10), +#' c(0, 5, 6, 8))) +#' op_diff(x) +#' op_diff(x, axis = 1) +#' ``` +#' +#' @returns +#' Tensor of diagonals. +#' +#' @param a +#' Input tensor. +#' +#' @param n +#' The number of times values are differenced. Defaults to `1`. +#' +#' @param axis +#' Axis to compute discrete difference(s) along. +#' Defaults to `-1` (last axis). +#' +#' @export +#' @family numpy ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.diff +op_diff <- +function (a, n = 1L, axis = -1L) +{ + args <- capture_args(list(n = as_integer, axis = as_axis)) + do.call(keras$ops$diff, args) +} + + +#' Returns the indices of the bins to which each value in `x` belongs. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(0.0, 1.0, 3.0, 1.6)) +#' bins <- array(c(0.0, 3.0, 4.5, 7.0)) +#' op_digitize(x, bins) +#' # array([1, 1, 2, 1]) +#' ``` +#' +#' @returns +#' Output array of indices, of same shape as `x`. +#' +#' @param x +#' Input array to be binned. +#' +#' @param bins +#' Array of bins. It has to be one-dimensional and monotonically +#' increasing. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.digitize +op_digitize <- +function (x, bins) +{ + args <- capture_args(list(bins = as.array)) + do.call(keras$ops$digitize, args) +} + + +#' Divide arguments element-wise. +#' +#' Note that this function is automatically called when using the R operator `*` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_divide(x, 2) +#' x / 2 +#' ``` +#' +#' @returns +#' Output tensor, the quotient `x1/x2`, element-wise. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @details +#' +#' # Example +#' ```{r} +#' op_divide(3, 2) +#' ``` +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.divide +op_divide <- +function (x1, x2) +keras$ops$divide(x1, x2) + + +#' Dot product of two tensors. +#' +#' @description +#' - If both `x1` and `x2` are 1-D tensors, it is inner product of vectors +#' (without complex conjugation). +#' - If both `x1` and `x2` are 2-D tensors, it is matrix multiplication. +#' - If either `x1` or `x2` is 0-D (scalar), it is equivalent to `x1 * x2`. +#' - If `x1` is an N-D tensor and `x2` is a 1-D tensor, it is a sum product +#' over the last axis of `x1` and `x2`. +#' - If `x1` is an N-D tensor and `x2` is an M-D tensor (where `M >= 2`), +#' it is a sum product over the last axis of `x1` and the second-to-last +#' axis of `x2`: `dot(x1, x2)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])`. +#' +#' # Note +#' Torch backend does not accept 0-D tensors as arguments. +#' +#' @returns +#' Dot product of `x1` and `x2`. +#' +#' @param x1 +#' First argument. +#' +#' @param x2 +#' Second argument. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.dot +op_dot <- +function (x1, x2) +keras$ops$dot(x1, x2) + + +#' Evaluates the Einstein summation convention on the operands. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' a <- op_arange(25) |> op_reshape(c(5, 5)) +#' b <- op_arange(5) +#' c <- op_arange(6) |> op_reshape(c(2, 3)) +#' ``` +#' +#' Trace of a matrix: +#' +#' ```{r, results = 'hold'} +#' op_einsum("ii", a) +#' op_trace(a) +#' ``` +#' +#' Extract the diagonal: +#' +#' ```{r, results = 'hold'} +#' op_einsum("ii -> i", a) +#' op_diag(a) +#' ``` +#' +#' Sum over an axis: +#' +#' ```{r, results = 'hold'} +#' op_einsum("ij -> i", a) +#' op_sum(a, axis = 2) +#' ``` +#' +#' For higher dimensional tensors summing a single axis can be done +#' with ellipsis: +#' +#' ```{r, results = 'hold'} +#' op_einsum("...j -> ...", a) +#' op_sum(a, axis = -1) +#' ``` +#' +#' Compute a matrix transpose or reorder any number of axes: +#' +#' ```{r, results = 'hold'} +#' op_einsum("ji", c) # return c unchanged +#' ```` +#' ```{r, results = 'hold'} +#' op_einsum("ij -> ji", c) # transpose +#' op_transpose(c) # same as above +#' ``` +#' +#' Matrix vector multiplication: +#' +#' ```{r, results = 'hold'} +#' op_einsum("ij, j", a, b) +#' op_einsum("...j, j", a, b) +#' a %*% b +#' op_matmul(a, b) +#' ``` +#' +#' @returns +#' The calculation based on the Einstein summation convention. +#' +#' @param subscripts +#' Specifies the subscripts for summation as comma separated +#' list of subscript labels. An implicit (classical Einstein +#' summation) calculation is performed unless the explicit indicator +#' `->` is included as well as subscript labels of the precise +#' output form. +#' +#' @param ... +#' The operands to compute the Einstein sum of. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.einsum +op_einsum <- +function (subscripts, ...) +keras$ops$einsum(subscripts, ...) + + +#' Return a tensor of given shape and type filled with uninitialized data. +#' +#' @returns +#' The empty tensor. +#' +#' @param shape +#' Shape of the empty tensor. +#' +#' @param dtype +#' Desired data type of the empty tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.empty +op_empty <- +function (shape, dtype = NULL) +{ + args <- capture_args(list(shape = normalize_shape)) + do.call(keras$ops$empty, args) +} + + +#' Returns `(x1 == x2)` element-wise. +#' +#' Note that this function is automatically called when using the R operator `==` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_equal(x, 2) +#' x == 2 +#' ``` +#' +#' @returns +#' Output tensor, element-wise comparison of `x1` and `x2`. +#' +#' @param x1 +#' Tensor to compare. +#' +#' @param x2 +#' Tensor to compare. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.equal +op_equal <- +function (x1, x2) +keras$ops$equal(x1, x2) + + +#' Calculate the exponential of all elements in the input tensor. +#' +#' @returns +#' Output tensor, element-wise exponential of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.exp +op_exp <- +function (x) +keras$ops$exp(x) + + +#' Expand the shape of a tensor. +#' +#' @description +#' Insert a new axis at the `axis` position in the expanded tensor shape. +#' +#' @returns +#' Output tensor with the number of dimensions increased. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Position in the expanded axes where the new axis +#' (or axes) is placed. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.expand_dims +op_expand_dims <- +function (x, axis) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$expand_dims, args) +} + + +#' Calculate `exp(x) - 1` for all elements in the tensor. +#' +#' @returns +#' Output tensor, element-wise exponential minus one. +#' +#' @param x +#' Input values. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.expm1 +op_expm1 <- +function (x) +keras$ops$expm1(x) + + +#' Return a 2-D tensor with ones on the diagonal and zeros elsewhere. +#' +#' @returns +#' Tensor with ones on the k-th diagonal and zeros elsewhere. +#' +#' @param N +#' Number of rows in the output. +#' +#' @param M +#' Number of columns in the output. If `NULL`, defaults to `N`. +#' +#' @param k +#' Index of the diagonal: 0 (the default) refers to the main +#' diagonal, a positive value refers to an upper diagonal, +#' and a negative value to a lower diagonal. +#' +#' @param dtype +#' Data type of the returned tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.eye +op_eye <- +function (N, M = NULL, k = 0L, dtype = NULL) +{ + args <- capture_args(list(k = as_integer)) + do.call(keras$ops$eye, args) +} + + +#' Reverse the order of elements in the tensor along the given axis. +#' +#' @description +#' The shape of the tensor is preserved, but the elements are reordered. +#' +#' @returns +#' Output tensor with entries of `axis` reversed. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which to flip the tensor. The default, +#' `axis = NULL`, will flip over all of the axes of the input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.flip +op_flip <- +function (x, axis = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$flip, args) +} + + +#' Return the floor of the input, element-wise. +#' +#' @description +#' The floor of the scalar `x` is the largest integer `i`, such that `i <= x`. +#' +#' @returns +#' Output tensor, element-wise floor of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.floor +op_floor <- +function (x) +keras$ops$floor(x) + + +#' Returns the largest integer smaller or equal to the division of inputs. +#' +#' Note that this function is automatically called when using the R operator `%/%` with a tensor. +#' ```{r} +#' (x <- op_arange(10)) +#' op_floor_divide(x, 2) +#' x %/% 2 +#' ``` +#' +#' @returns +#' Output tensor, `y <- floor(x1/x2)` +#' +#' @param x1 +#' Numerator. +#' +#' @param x2 +#' Denominator. +#' +#' @export +#' @family numpy ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.floor_divide +op_floor_divide <- +function (x1, x2) +keras$ops$floor_divide(x1, x2) + + +#' Return a new tensor of given shape and type, filled with `fill_value`. +#' +#' @returns +#' Output tensor. +#' +#' @param shape +#' Shape of the new tensor. +#' +#' @param fill_value +#' Fill value. +#' +#' @param dtype +#' Desired data type of the tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.full +op_full <- +function (shape, fill_value, dtype = NULL) +{ + args <- capture_args(list(shape = normalize_shape)) + do.call(keras$ops$full, args) +} + + +#' Return a full tensor with the same shape and type as the given tensor. +#' +#' @returns +#' Tensor of `fill_value` with the same shape and type as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param fill_value +#' Fill value. +#' +#' @param dtype +#' Overrides data type of the result. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.full_like +op_full_like <- +function (x, fill_value, dtype = NULL) +keras$ops$full_like(x, fill_value, dtype) + + +#' Return `x[key]`. +#' +#' @param x +#' A dictionary-like object +#' +#' @param key +#' Generally, a string, but most object with a `__hash__` method are acceptable. +#' +#' @note +#' Generally, calling `x[[key]]` or `x$key` is preferable. +#' +#' @returns `key`. +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.get_item +op_get_item <- +function (x, key) +keras$ops$get_item(x, key) + + +#' Return the truth value of `x1 > x2` element-wise. +#' +#' Note that this function is automatically called when using the R operator `>` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_greater(x, 2) +#' x > 2 +#' ``` +#' +#' @returns +#' Output tensor, element-wise comparison of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.greater +op_greater <- +function (x1, x2) +keras$ops$greater(x1, x2) + + +#' Return the truth value of `x1 >= x2` element-wise. +#' +#' Note that this function is automatically called when using the R operator `>=` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_greater_equal(x, 2) +#' x >= 2 +#' ``` +#' @returns +#' Output tensor, element-wise comparison of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.greater_equal +op_greater_equal <- +function (x1, x2) +keras$ops$greater_equal(x1, x2) + + +#' Stack tensors in sequence horizontally (column wise). +#' +#' @description +#' This is equivalent to concatenation along the first axis for 1-D tensors, +#' and along the second axis for all other tensors. +#' +#' @returns +#' The tensor formed by stacking the given tensors. +#' +#' @param xs +#' Sequence of tensors. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.hstack +op_hstack <- +function (xs) +keras$ops$hstack(xs) + + +#' Return the identity tensor. +#' +#' @description +#' The identity tensor is a square tensor with ones on the main diagonal and +#' zeros elsewhere. +#' +#' @returns +#' The identity tensor. +#' +#' @param n +#' Number of rows (and columns) in the `n x n` output tensor. +#' +#' @param dtype +#' Data type of the output tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.identity +op_identity <- +function (n, dtype = NULL) +keras$ops$identity(n, dtype) + + +#' Return the imaginary part of the complex argument. +#' +#' @returns +#' The imaginary component of the complex argument. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.imag +op_imag <- +function (x) +keras$ops$imag(x) + + +#' Return whether two tensors are element-wise almost equal. +#' +#' @returns +#' Output boolean tensor. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.isclose +op_isclose <- +function (x1, x2) +keras$ops$isclose(x1, x2) + + +#' Return whether a tensor is finite, element-wise. +#' +#' @description +#' Real values are finite when they are not NaN, not positive infinity, and +#' not negative infinity. Complex values are finite when both their real +#' and imaginary parts are finite. +#' +#' @returns +#' Output boolean tensor. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.isfinite +op_isfinite <- +function (x) +keras$ops$isfinite(x) + + +#' Test element-wise for positive or negative infinity. +#' +#' @returns +#' Output boolean tensor. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.isinf +op_isinf <- +function (x) +keras$ops$isinf(x) + + +#' Test element-wise for NaN and return result as a boolean tensor. +#' +#' @returns +#' Output boolean tensor. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.isnan +op_isnan <- +function (x) +keras$ops$isnan(x) + + +#' Return the truth value of `x1 < x2` element-wise. +#' +#' Note that this function is automatically called when using the R operator `<` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_less(x, 2) +#' x < 2 +#' ``` +#' +#' @returns +#' Output tensor, element-wise comparison of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.less +op_less <- +function (x1, x2) +keras$ops$less(x1, x2) + + +#' Return the truth value of `x1 <= x2` element-wise. +#' +#' Note that this function is automatically called when using the R operator `<=` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_less_equal(x, 2) +#' x <= 2 +#' ``` +#' +#' @returns +#' Output tensor, element-wise comparison of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.less_equal +op_less_equal <- +function (x1, x2) +keras$ops$less_equal(x1, x2) + + +#' Return evenly spaced numbers over a specified interval. +#' +#' @description +#' Returns `num` evenly spaced samples, calculated over the interval +#' `[start, stop]`. +#' +#' The endpoint of the interval can optionally be excluded. +#' +#' # Note +#' Torch backend does not support `axis` argument. +#' +#' @returns +#' A tensor of evenly spaced numbers. +#' If `retstep` is `TRUE`, returns `(samples, step)` +#' +#' @param start +#' The starting value of the sequence. +#' +#' @param stop +#' The end value of the sequence, unless `endpoint` is set to +#' `FALSE`. In that case, the sequence consists of all but the last +#' of `num + 1` evenly spaced samples, so that `stop` is excluded. +#' Note that the step size changes when `endpoint` is `FALSE`. +#' +#' @param num +#' Number of samples to generate. Defaults to `50`. Must be +#' non-negative. +#' +#' @param endpoint +#' If `TRUE`, `stop` is the last sample. Otherwise, it is +#' not included. Defaults to `TRUE`. +#' +#' @param retstep +#' If `TRUE`, return `(samples, step)`, where `step` is the +#' spacing between samples. +#' +#' @param dtype +#' The type of the output tensor. +#' +#' @param axis +#' The axis in the result to store the samples. Relevant only if +#' start or stop are array-like. Defaults to `1`, the first axis. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.linspace +op_linspace <- +function (start, stop, num = 50L, endpoint = TRUE, retstep = FALSE, + dtype = NULL, axis = 1L) +{ + args <- capture_args(list(num = as_integer, axis = as_axis)) + do.call(keras$ops$linspace, args) +} + + +#' Natural logarithm, element-wise. +#' +#' @returns +#' Output tensor, element-wise natural logarithm of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.log +op_log <- +function (x) +keras$ops$log(x) + + +#' Return the base 10 logarithm of the input tensor, element-wise. +#' +#' @returns +#' Output tensor, element-wise base 10 logarithm of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.log10 +op_log10 <- +function (x) +keras$ops$log10(x) + + +#' Returns the natural logarithm of one plus the `x`, element-wise. +#' +#' @description +#' Calculates `log(1 + x)`. +#' +#' @returns +#' Output tensor, element-wise natural logarithm of `1 + x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.log1p +op_log1p <- +function (x) +keras$ops$log1p(x) + + +#' Base-2 logarithm of `x`, element-wise. +#' +#' @returns +#' Output tensor, element-wise base-2 logarithm of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.log2 +op_log2 <- +function (x) +keras$ops$log2(x) + + +#' Logarithm of the sum of exponentiations of the inputs. +#' +#' @description +#' Calculates `log(exp(x1) + exp(x2))`. +#' +#' @returns +#' Output tensor, element-wise logarithm of the sum of exponentiations +#' of the inputs. +#' +#' @param x1 +#' Input tensor. +#' +#' @param x2 +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.logaddexp +op_logaddexp <- +function (x1, x2) +keras$ops$logaddexp(x1, x2) + + +#' Computes the element-wise logical AND of the given input tensors. +#' +#' Note that this function is automatically called when using the R operator `&` with a tensor. +#' +#' @description +#' Zeros are treated as `FALSE` and non-zeros are treated as `TRUE`. +#' +#' @returns +#' Output tensor, element-wise logical AND of the inputs. +#' +#' @param x1 +#' Input tensor. +#' +#' @param x2 +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.logical_and +op_logical_and <- +function (x1, x2) +keras$ops$logical_and(x1, x2) + + +#' Computes the element-wise NOT of the given input tensor. +#' +#' @description +#' Zeros are treated as `FALSE` and non-zeros are treated as `TRUE`. +#' +#' Note that this function is automatically called when using the R operator `!` with a tensor. +#' +#' @returns +#' Output tensor, element-wise logical NOT of the input. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.logical_not +op_logical_not <- +function (x) +keras$ops$logical_not(x) + + +#' Computes the element-wise logical OR of the given input tensors. +#' +#' @description +#' Zeros are treated as `FALSE` and non-zeros are treated as `TRUE`. +#' +#' Note that this function is automatically called when using the R operator `|` with a tensor. +#' +#' @returns +#' Output tensor, element-wise logical OR of the inputs. +#' +#' @param x1 +#' Input tensor. +#' +#' @param x2 +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.logical_or +op_logical_or <- +function (x1, x2) +keras$ops$logical_or(x1, x2) + + +#' Compute the truth value of `x1 XOR x2`, element-wise. +#' +#' @returns +#' Output boolean tensor. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.logical_xor +op_logical_xor <- +function (x1, x2) +keras$ops$logical_xor(x1, x2) + + +#' Returns numbers spaced evenly on a log scale. +#' +#' @description +#' In linear space, the sequence starts at `base ** start` and ends with +#' `base ** stop` (see `endpoint` below). +#' +#' # Note +#' Torch backend does not support `axis` argument. +#' +#' @returns +#' A tensor of evenly spaced samples on a log scale. +#' +#' @param start +#' The starting value of the sequence. +#' +#' @param stop +#' The final value of the sequence, unless `endpoint` is `FALSE`. +#' In that case, `num + 1` values are spaced over the interval in +#' log-space, of which all but the last (a sequence of length `num`) +#' are returned. +#' +#' @param num +#' Number of samples to generate. Defaults to `50`. +#' +#' @param endpoint +#' If `TRUE`, `stop` is the last sample. Otherwise, it is not +#' included. Defaults to `TRUE`. +#' +#' @param base +#' The base of the log space. Defaults to `10`. +#' +#' @param dtype +#' The type of the output tensor. +#' +#' @param axis +#' The axis in the result to store the samples. Relevant only +#' if start or stop are array-like. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.logspace +op_logspace <- +function (start, stop, num = 50L, endpoint = TRUE, base = 10L, + dtype = NULL, axis = 1L) +{ + args <- capture_args(list(num = as_integer, base = as_integer, + axis = as_axis)) + do.call(keras$ops$logspace, args) +} + + +#' Matrix product of two tensors. +#' +#' @description +#' - If both tensors are 1-dimensional, the dot product (scalar) is returned. +#' - If either tensor is N-D, N > 2, it is treated as a stack of matrices +#' residing in the last two indexes and broadcast accordingly. +#' - If the first tensor is 1-D, it is promoted to a matrix by prepending +#' a 1 to its dimensions. After matrix multiplication the prepended +#' 1 is removed. +#' - If the second tensor is 1-D, it is promoted to a matrix by appending a 1 +#' to its dimensions. After matrix multiplication the appended 1 is removed. +#' +#' @returns +#' Output tensor, matrix product of the inputs. +#' +#' @param x1 +#' First tensor. +#' +#' @param x2 +#' Second tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.matmul +op_matmul <- +function (x1, x2) +keras$ops$matmul(x1, x2) + + +#' Return the maximum of a tensor or maximum along an axis. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' (x <- op_convert_to_tensor(rbind(c(1, 3, 5), c(1, 5, 2)))) +#' op_max(x) +#' op_max(x, axis = 1) +#' op_max(x, axis = 1, keepdims = TRUE) +#' ``` +#' +#' @returns +#' Maximum of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which to operate. By default, flattened input +#' is used. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. Defaults to `FALSE`. +#' +#' @param initial +#' The minimum value of an output element. Defaults to `NULL`. +#' +#' @export +#' @aliases op_amax +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.max +op_max <- +function (x, axis = NULL, keepdims = FALSE, initial = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$max, args) +} + + +#' Element-wise maximum of `x1` and `x2`. +#' +#' @returns +#' Output tensor, element-wise maximum of `x1` and `x2`. +#' +#' @param x1 +#' First tensor. +#' +#' @param x2 +#' Second tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.maximum +op_maximum <- +function (x1, x2) +keras$ops$maximum(x1, x2) + +#' @export +#' @rdname op_maximum +op_pmax <- op_maximum + + +#' Compute the arithmetic mean along the specified axes. +#' +#' @returns +#' Output tensor containing the mean values. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which the means are computed. The default +#' is to compute the mean of the flattened tensor. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.mean +op_mean <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$mean, args) +} + + +#' Compute the median along the specified axis. +#' +#' @returns +#' The output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which the medians are computed. Defaults to +#' `axis = NULL` which is to compute the median(s) along a flattened +#' version of the array. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduce +#' are left in the result as dimensions with size one. +#' +#' @export +#' @family numpy ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.median +op_median <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$median, args) +} + + +#' Creates grids of coordinates from coordinate vectors. +#' +#' @description +#' Given `N` 1-D tensors `T0, T1, ..., TN-1` as inputs with corresponding +#' lengths `S0, S1, ..., SN-1`, this creates an `N` N-dimensional tensors +#' `G0, G1, ..., GN-1` each with shape `(S0, ..., SN-1)` where the output +#' `Gi` is constructed by expanding `Ti` to the result shape. +#' +#' # Examples +#' ```{r} +#' x <- op_array(c(1, 2, 3), "int32") +#' y <- op_array(c(4, 5, 6), "int32") +#' ``` +#' +#' ```{r} +#' c(grid_x, grid_y) %<-% op_meshgrid(x, y, indexing = "ij") +#' grid_x +#' # array([[1, 1, 1], +#' # [2, 2, 2], +#' # [3, 3, 3])) +#' grid_y +#' # array([[4, 5, 6], +#' # [4, 5, 6], +#' # [4, 5, 6])) +#' ``` +#' +#' @returns +#' Sequence of N tensors. +#' +#' @param ... +#' 1-D tensors representing the coordinates of a grid. +#' +#' @param indexing +#' `"xy"` or `"ij"`. "xy" is cartesian; `"ij"` is matrix +#' indexing of output. Defaults to `"xy"`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' +#' @tether keras.ops.meshgrid +op_meshgrid <- +function (..., indexing = "xy") +{ + args <- lapply(list(...), function(x) { + if (storage.mode(x) == "double") + np_array(x, "int64") + else x + }) + keras$ops$meshgrid(!!!args, indexing = indexing) +} + + +#' Return the minimum of a tensor or minimum along an axis. +#' +#' @description +#' +#' # Examples +#' ```{r} +#' (x <- op_convert_to_tensor(rbind(c(1, 3, 5), c(1, 5, 2)))) +#' op_min(x) +#' op_min(x, axis = 1) +#' op_min(x, axis = 1, keepdims = TRUE) +#' ``` +#' @returns +#' Minimum of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which to operate. By default, flattened input +#' is used. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. Defaults to `FALSE`. +#' +#' @param initial +#' The maximum value of an output element. Defaults to `NULL`. +#' +#' @export +#' @aliases op_amin +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.min +op_min <- +function (x, axis = NULL, keepdims = FALSE, initial = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$min, args) +} + + +#' Element-wise minimum of `x1` and `x2`. +#' +#' @returns +#' Output tensor, element-wise minimum of `x1` and `x2`. +#' +#' @param x1 +#' First tensor. +#' +#' @param x2 +#' Second tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.minimum +op_minimum <- +function (x1, x2) +keras$ops$minimum(x1, x2) + +#' @rdname op_minimum +#' @export +op_pmin <- op_minimum + + +#' Returns the element-wise remainder of division. +#' +#' Note that this function is automatically called when using the R operator `%%` with a tensor. +#' ```{r} +#' (x <- op_arange(10)) +#' op_mod(x, 3) +#' x %% 3 +#' ``` +#' +#' @returns +#' Output tensor, element-wise remainder of division. +#' +#' @param x1 +#' First tensor. +#' +#' @param x2 +#' Second tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.mod +op_mod <- +function (x1, x2) +keras$ops$mod(x1, x2) + + +#' Move axes of a tensor to new positions. +#' +#' @description +#' Other axes remain in their original order. +#' +#' @returns +#' Tensor with moved axes. +#' +#' @param x +#' Tensor whose axes should be reordered. +#' +#' @param source +#' Original positions of the axes to move. These must be unique. +#' +#' @param destination +#' Destinations positions for each of the original axes. +#' These must also be unique. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.moveaxis +op_moveaxis <- +function (x, source, destination) +keras$ops$moveaxis(x, as_axis(source), as_axis(destination)) + + +#' Multiply arguments element-wise. +#' +#' Note that this function is automatically called when using the R operator `*` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_multiply(x, x) +#' x * x +#' ``` +#' @returns +#' Output tensor, element-wise product of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.multiply +op_multiply <- +function (x1, x2) +keras$ops$multiply(x1, x2) + + +#' Replace NaN with zero and infinity with large finite numbers. +#' +#' @returns +#' `x`, with non-finite values replaced. +#' +#' @param x +#' Input data. +#' +#' @param nan +#' Optional float or int. Value to replace `NaN` entries with. +#' +#' @param posinf +#' Optional float or int. Value to replace positive infinity with. +#' +#' @param neginf +#' Optional float or int. Value to replace negative infinity with. +#' +#' @details +#' +#' # Example +#' ```{r} +#' (x <- op_convert_to_tensor(c(1, NaN, -Inf, Inf))) +#' op_nan_to_num(x) +#' op_nan_to_num(x, nan = -1, posinf = 2, neginf = -2) +#' ``` +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.nan_to_num +op_nan_to_num <- +function (x, nan = 0, posinf = NULL, neginf = NULL) { + args <- capture_args() + do.call(keras$ops$nan_to_num, args) +} + + +#' Return the number of dimensions of a tensor. +#' +#' @returns +#' The number of dimensions in `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.ndim +op_ndim <- +function (x) +keras$ops$ndim(x) + + +#' Numerical negative, element-wise. +#' +#' Note that this function is automatically called when using the unary R operator `-` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_negative(x) +#' -x +#' ``` +#' +#' @returns +#' Output tensor, `y = -x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.negative +op_negative <- +function (x) +keras$ops$negative(x) + + +#' Return the indices of the elements that are non-zero. +#' +#' @returns +#' Indices of elements that are non-zero. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.nonzero +op_nonzero <- +function (x) +keras$ops$nonzero(x) + + +#' Return `(x1 != x2)` element-wise. +#' +#' Note that this function is automatically called when using the R operator `!=` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_not_equal(x, 2) +#' x != 2 +#' ``` +#' +#' @returns +#' Output tensor, element-wise comparsion of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.not_equal +op_not_equal <- +function (x1, x2) +keras$ops$not_equal(x1, x2) + + +#' Return a new tensor of given shape and type, filled with ones. +#' +#' @returns +#' Tensor of ones with the given shape and dtype. +#' +#' @param shape +#' Shape of the new tensor. +#' +#' @param dtype +#' Desired data type of the tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.ones +op_ones <- +function (shape, dtype = NULL) +{ + args <- capture_args(list(shape = normalize_shape)) + do.call(keras$ops$ones, args) +} + + +#' Return a tensor of ones with the same shape and type of `x`. +#' +#' @returns +#' A tensor of ones with the same shape and type as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param dtype +#' Overrides the data type of the result. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.ones_like +op_ones_like <- +function (x, dtype = NULL) +keras$ops$ones_like(x, dtype) + + +#' Compute the outer product of two vectors. +#' +#' @description +#' Given two vectors `x1` and `x2`, the outer product is: +#' +#' ``` +#' out[i, j] = x1[i] * x2[j] +#' ``` +#' +#' @returns +#' Outer product of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.outer +op_outer <- +function (x1, x2) +keras$ops$outer(x1, x2) + + +#' Pad a tensor. +#' +#' @description +#' +#' # Note +#' Torch backend only supports modes `"constant"`, `"reflect"`, +#' `"symmetric"` and `"circular"`. +#' Only Torch backend supports `"circular"` mode. +#' +#' Note: +#' Tensorflow backend only supports modes `"constant"`, `"reflect"` +#' and `"symmetric"`. +#' +#' @returns +#' Padded tensor. +#' +#' @param x +#' Tensor to pad. +#' +#' @param pad_width +#' Number of values padded to the edges of each axis. +#' `((before_1, after_1), ...(before_N, after_N))` unique pad +#' widths for each axis. +#' `((before, after),)` yields same before and after pad for +#' each axis. +#' `(pad,)` or `int` is a shortcut for `before = after = pad` +#' width for all axes. +#' +#' @param mode +#' One of `"constant"`, `"edge"`, `"linear_ramp"`, +#' `"maximum"`, `"mean"`, `"median"`, `"minimum"`, +#' `"reflect"`, `"symmetric"`, `"wrap"`, `"empty"`, +#' `"circular"`. Defaults to`"constant"`. +#' +#' @param constant_values +#' Value to pad with if `mode == "constant"`. +#' Defaults to `0`. A `ValueError` is raised if not `NULL` and +#' `mode != "constant"`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.pad +op_pad <- +function (x, pad_width, mode = "constant", constant_values = NULL) +{ + args <- capture_args(list(pad_width = as_integer)) + do.call(keras$ops$pad, args) +} + + +#' First tensor elements raised to powers from second tensor, element-wise. +#' +#' Note that this function is automatically called when using the R operator `^` with a tensor. +#' ```{r} +#' (x <- op_arange(4)) +#' op_power(2, x) +#' 2 ^ x +#' ``` +#' @returns +#' Output tensor, the bases in `x1` raised to the exponents in `x2`. +#' +#' @param x1 +#' The bases. +#' +#' @param x2 +#' The exponents. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.power +op_power <- +function (x1, x2) +keras$ops$power(x1, x2) + + +#' Return the product of tensor elements over a given axis. +#' +#' @returns +#' Product of elements of `x` over the given axis or axes. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which a product is performed. The default, +#' `axis = NULL`, will compute the product of all elements +#' in the input tensor. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduce +#' are left in the result as dimensions with size one. +#' +#' @param dtype +#' Data type of the returned tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.prod +op_prod <- +function (x, axis = NULL, keepdims = FALSE, dtype = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$prod, args) +} + + +#' Compute the q-th quantile(s) of the data along the specified axis. +#' +#' @returns +#' The quantile(s). If `q` is a single probability and `axis=NULL`, then +#' the result is a scalar. If multiple probabilies levels are given, first +#' axis of the result corresponds to the quantiles. The other axes are the +#' axes that remain after the reduction of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param q +#' Probability or sequence of probabilities for the quantiles to +#' compute. Values must be between 0 and 1 inclusive. +#' +#' @param axis +#' Axis or axes along which the quantiles are computed. Defaults to +#' `axis=NULL` which is to compute the quantile(s) along a flattened +#' version of the array. +#' +#' @param method +#' A string specifies the method to use for estimating the +#' quantile. Available methods are `"linear"`, `"lower"`, `"higher"`, +#' `"midpoint"`, and `"nearest"`. Defaults to `"linear"`. +#' If the desired quantile lies between two data points `i < j`: +#' - `"linear"`: `i + (j - i) * fraction`, where fraction is the +#' fractional part of the index surrounded by `i` and `j`. +#' - `"lower"`: `i`. +#' - `"higher"`: `j`. +#' - `"midpoint"`: `(i + j) / 2` +#' - `"nearest"`: `i` or `j`, whichever is nearest. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduce +#' are left in the result as dimensions with size one. +#' +#' @export +#' @family numpy ops +#' @family ops +# @seealso +# + +#' @tether keras.ops.quantile +op_quantile <- +function (x, q, axis = NULL, method = "linear", keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$quantile, args) +} + + +#' Return a contiguous flattened tensor. +#' +#' @description +#' A 1-D tensor, containing the elements of the input, is returned. +#' +#' @returns +#' Output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.ravel +op_ravel <- +function (x) +keras$ops$ravel(x) + + +#' Return the real part of the complex argument. +#' +#' @returns +#' The real component of the complex argument. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.real +op_real <- +function (x) +keras$ops$real(x) + + +#' Return the reciprocal of the argument, element-wise. +#' +#' @description +#' Calculates `1/x`. +#' +#' @returns +#' Output tensor, element-wise reciprocal of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.reciprocal +op_reciprocal <- +function (x) +keras$ops$reciprocal(x) + + +#' Repeat each element of a tensor after themselves. +#' +#' @returns +#' Output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param repeats +#' The number of repetitions for each element. +#' +#' @param axis +#' The axis along which to repeat values. By default, use +#' the flattened input array, and return a flat output array. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.repeat +op_repeat <- +function (x, repeats, axis = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$`repeat`, args) +} + + +#' Gives a new shape to a tensor without changing its data. +#' +#' @returns +#' The reshaped tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param newshape +#' The new shape should be compatible with the original shape. +#' One shape dimension can be `-1` in which case the value is +#' inferred from the length of the array and remaining dimensions. +#' +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.reshape +op_reshape <- +function (x, newshape) +{ + keras$ops$reshape(x, tuple(lapply(shape(newshape), + function(d) d %||% -1L))) +} + + +#' Roll tensor elements along a given axis. +#' +#' @description +#' Elements that roll beyond the last position are re-introduced at the first. +#' +#' @returns +#' Output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param shift +#' The number of places by which elements are shifted. +#' +#' @param axis +#' The axis along which elements are shifted. By default, the +#' array is flattened before shifting, after which the original +#' shape is restored. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.roll +op_roll <- +function (x, shift, axis = NULL) +{ + args <- capture_args(list(shift = as_integer, axis = as_axis)) + do.call(keras$ops$roll, args) +} + + +#' Evenly round to the given number of decimals. +#' +#' @returns +#' Output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param decimals +#' Number of decimal places to round to. Defaults to `0`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.round +op_round <- +function (x, decimals = 0L) +{ + args <- capture_args(list(decimals = as_integer)) + do.call(keras$ops$round, args) +} + + +#' Returns a tensor with the signs of the elements of `x`. +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.sign +op_sign <- +function (x) +keras$ops$sign(x) + + +#' Trigonometric sine, element-wise. +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.sin +op_sin <- +function (x) +keras$ops$sin(x) + + +#' Hyperbolic sine, element-wise. +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.sinh +op_sinh <- +function (x) +keras$ops$sinh(x) + + +#' Return the number of elements in a tensor. +#' +#' @returns +#' Number of elements in `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.size +op_size <- +function (x) +keras$ops$size(x) + + +#' Sorts the elements of `x` along a given axis in ascending order. +#' +#' @returns +#' Sorted tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis along which to sort. If `NULL`, the tensor is flattened +#' before sorting. Defaults to `-1`; the last axis. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.sort +op_sort <- +function (x, axis = -1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$sort, args) +} + + +#' Split a tensor into chunks. +#' +#' @description +#' +#' # Note +#' A split does not have to result in equal division when using +#' Torch backend. +#' +#' @returns +#' A list of tensors. +#' +#' @param x +#' Input tensor. +#' +#' @param indices_or_sections +#' If an integer, N, the tensor will be split into N +#' equal sections along `axis`. If a 1-D array of sorted integers, +#' the entries indicate indices at which the tensor will be split +#' along `axis`. +#' +#' @param axis +#' Axis along which to split. Defaults to `1`, the first axis. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.split +op_split <- +function (x, indices_or_sections, axis = 1L) +{ + args <- capture_args(list(indices_or_sections = as_integer, + axis = as_axis)) + do.call(keras$ops$split, args) +} + + +#' Return the non-negative square root of a tensor, element-wise. +#' +#' @returns +#' Output tensor, the non-negative square root of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.sqrt +op_sqrt <- +function (x) +keras$ops$sqrt(x) + + +#' Return the element-wise square of the input. +#' +#' @returns +#' Output tensor, the square of `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.square +op_square <- +function (x) +keras$ops$square(x) + + +#' Remove axes of length one from `x`. +#' +#' @returns +#' The input tensor with all or a subset of the dimensions of +#' length 1 removed. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Select a subset of the entries of length one in the shape. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.squeeze +op_squeeze <- +function (x, axis = NULL) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$squeeze, args) +} + + +#' Join a sequence of tensors along a new axis. +#' +#' @description +#' The `axis` parameter specifies the index of the new axis in the +#' dimensions of the result. +#' +#' @returns +#' The stacked tensor. +#' +#' @param x +#' A sequence of tensors. +#' +#' @param axis +#' Axis along which to stack. Defaults to `1`, the first axis. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.stack +op_stack <- +function (x, axis = 1L) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$stack, args) +} + + +#' Compute the standard deviation along the specified axis. +#' +#' @returns +#' Output tensor containing the standard deviation values. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis along which to compute standard deviation. +#' Default is to compute the standard deviation of the +#' flattened tensor. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.std +op_std <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$std, args) +} + + +#' Subtract arguments element-wise. +#' +#' Note that this function is automatically called when using the R operator `-` with a tensor. +#' ```{r} +#' x <- op_ones(c(3)) +#' op_subtract(x, x) +#' x - x +#' ``` +#' +#' @returns +#' Output tensor, element-wise difference of `x1` and `x2`. +#' +#' @param x1 +#' First input tensor. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.subtract +op_subtract <- +function (x1, x2) +keras$ops$subtract(x1, x2) + + +#' Sum of a tensor over the given axes. +#' +#' @returns +#' Output tensor containing the sum. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which the sum is computed. The default is to +#' compute the sum of the flattened tensor. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.sum +op_sum <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$sum, args) +} + + +#' Interchange two axes of a tensor. +#' +#' @returns +#' A tensor with the axes swapped. +#' +#' @param x +#' Input tensor. +#' +#' @param axis1 +#' First axis. +#' +#' @param axis2 +#' Second axis. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.swapaxes +op_swapaxes <- +function (x, axis1, axis2) +keras$ops$swapaxes(x, axis1, axis2) + + +#' Take elements from a tensor along an axis. +#' +#' @returns +#' The corresponding tensor of values. +#' +#' @param x +#' Source tensor. +#' +#' @param indices +#' The indices of the values to extract. +#' +#' @param axis +#' The axis over which to select values. By default, the +#' flattened input tensor is used. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.take +op_take <- +function (x, indices, axis = NULL) +{ + args <- capture_args(list(indices = as_index, axis = as_axis)) + do.call(keras$ops$take, args) +} + + +#' Select values from `x` at the 1-D `indices` along the given axis. +#' +#' @returns +#' The corresponding tensor of values. +#' +#' @param x +#' Source tensor. +#' +#' @param indices +#' The indices of the values to extract. +#' +#' @param axis +#' The axis over which to select values. By default, the flattened +#' input tensor is used. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.take_along_axis +op_take_along_axis <- +function (x, indices, axis = NULL) +{ + args <- capture_args(list(indices = as_index, axis = as_axis)) + do.call(keras$ops$take_along_axis, args) +} + + +#' Compute tangent, element-wise. +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.tan +op_tan <- +function (x) +keras$ops$tan(x) + + +#' Hyperbolic tangent, element-wise. +#' +#' @returns +#' Output tensor of same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.tanh +op_tanh <- +function (x) +keras$ops$tanh(x) + + +#' Compute the tensor dot product along specified axes. +#' +#' @returns +#' The tensor dot product of the inputs. +#' +#' @param x1 +#' First tensor. +#' +#' @param x2 +#' Second tensor. +#' +#' @param axes +#' - If an integer, N, sum over the last N axes of `x1` and the +#' first N axes of `x2` in order. The sizes of the corresponding +#' axes must match. +#' - Or, a list of axes to be summed over, first sequence applying +#' to `x1`, second to `x2`. Both sequences must be of the +#' same length. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.tensordot +op_tensordot <- +function (x1, x2, axes = 3L) +{ + args <- capture_args(list(axes = as_axis)) + do.call(keras$ops$tensordot, args) +} + + +#' Repeat `x` the number of times given by `repeats`. +#' +#' @description +#' If `repeats` has length `d`, the result will have dimension of +#' `max(d, x.ndim)`. +#' +#' If `x.ndim < d`, `x` is promoted to be d-dimensional by prepending +#' new axes. +#' +#' If `x.ndim > d`, `repeats` is promoted to `x.ndim` by prepending 1's to it. +#' +#' @returns +#' The tiled output tensor. +#' +#' @param x +#' Input tensor. +#' +#' @param repeats +#' The number of repetitions of `x` along each axis. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.tile +op_tile <- +function (x, repeats) +keras$ops$tile(x, repeats) + + +#' Return the sum along diagonals of the tensor. +#' +#' @description +#' If `x` is 2-D, the sum along its diagonal with the given offset is +#' returned, i.e., the sum of elements `x[i, i+offset]` for all `i`. +#' +#' If a has more than two dimensions, then the axes specified by `axis1` +#' and `axis2` are used to determine the 2-D sub-arrays whose traces are +#' returned. +#' +#' The shape of the resulting tensor is the same as that of `x` with `axis1` +#' and `axis2` removed. +#' +#' @returns +#' If `x` is 2-D, the sum of the diagonal is returned. If `x` has +#' larger dimensions, then a tensor of sums along diagonals is +#' returned. +#' +#' @param x +#' Input tensor. +#' +#' @param offset +#' Offset of the diagonal from the main diagonal. Can be +#' both positive and negative. Defaults to `0`. +#' +#' @param axis1 +#' Axis to be used as the first axis of the 2-D sub-arrays. +#' Defaults to `1`. (first axis). +#' +#' @param axis2 +#' Axis to be used as the second axis of the 2-D sub-arrays. +#' Defaults to `2`. (second axis). +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.trace +op_trace <- +function (x, offset = 0L, axis1 = 1L, axis2 = 2L) +{ + args <- capture_args(list(offset = as_integer, axis1 = as_integer, + axis2 = as_integer)) + do.call(keras$ops$trace, args) +} + + +#' Returns a tensor with `axes` transposed. +#' +#' @returns +#' `x` with its axes permuted. +#' +#' @param x +#' Input tensor. +#' +#' @param axes +#' Sequence of integers. Permutation of the dimensions of `x`. +#' By default, the order of the axes are reversed. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.transpose +op_transpose <- +function (x, axes = NULL) +{ + args <- capture_args(list(axes = as_axis)) + do.call(keras$ops$transpose, args) +} + + +#' Return a tensor with ones at and below a diagonal and zeros elsewhere. +#' +#' @returns +#' Tensor with its lower triangle filled with ones and zeros elsewhere. +#' `T[i, j] == 1` for `j <= i + k`, 0 otherwise. +#' +#' @param N +#' Number of rows in the tensor. +#' +#' @param M +#' Number of columns in the tensor. +#' +#' @param k +#' The sub-diagonal at and below which the array is filled. +#' `k = 0` is the main diagonal, while `k < 0` is below it, and +#' `k > 0` is above. The default is 0. +#' +#' @param dtype +#' Data type of the returned tensor. The default is "float32". +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.tri +op_tri <- +function (N, M = NULL, k = 0L, dtype = NULL) +{ + args <- capture_args(list(k = as_integer)) + do.call(keras$ops$tri, args) +} + + +#' Return lower triangle of a tensor. +#' +#' @description +#' For tensors with `ndim` exceeding 2, `tril` will apply to the +#' final two axes. +#' +#' @returns +#' Lower triangle of `x`, of same shape and data type as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param k +#' Diagonal above which to zero elements. Defaults to `0`. the +#' main diagonal. `k < 0` is below it, and `k > 0` is above it. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.tril +op_tril <- +function (x, k = 0L) +{ + args <- capture_args(list(k = as_integer)) + do.call(keras$ops$tril, args) +} + + +#' Return upper triangle of a tensor. +#' +#' @description +#' For tensors with `ndim` exceeding 2, `triu` will apply to the +#' final two axes. +#' +#' @returns +#' Upper triangle of `x`, of same shape and data type as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param k +#' Diagonal below which to zero elements. Defaults to `0`. the +#' main diagonal. `k < 0` is below it, and `k > 0` is above it. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.triu +op_triu <- +function (x, k = 0L) +{ + args <- capture_args(list(k = as_integer)) + do.call(keras$ops$triu, args) +} + + +# ' Alias for `keras.ops.divide`. +# ' +# ' @param x1 +# ' see description +# ' +# ' @param x2 +# ' see description +# ' +# ' @export +# ' @family numpy ops +# ' @family ops +# ' @seealso +# ' + +# ' + +# ' @tether keras.ops.true_divide +# op_true_divide <- +# function (x1, x2) +# keras$ops$true_divide(x1, x2) + + +#' Compute the variance along the specified axes. +#' +#' @returns +#' Output tensor containing the variance. +#' +#' @param x +#' Input tensor. +#' +#' @param axis +#' Axis or axes along which the variance is computed. The default +#' is to compute the variance of the flattened tensor. +#' +#' @param keepdims +#' If this is set to `TRUE`, the axes which are reduced are left +#' in the result as dimensions with size one. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.var +op_var <- +function (x, axis = NULL, keepdims = FALSE) +{ + args <- capture_args(list(axis = as_axis)) + do.call(keras$ops$var, args) +} + + +#' Return the dot product of two vectors. +#' +#' @description +#' If the first argument is complex, the complex conjugate of the first +#' argument is used for the calculation of the dot product. +#' +#' Multidimensional tensors are flattened before the dot product is taken. +#' +#' @returns +#' Output tensor. +#' +#' @param x1 +#' First input tensor. If complex, its complex conjugate is taken +#' before calculation of the dot product. +#' +#' @param x2 +#' Second input tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.vdot +op_vdot <- +function (x1, x2) +keras$ops$vdot(x1, x2) + + +#' Stack tensors in sequence vertically (row wise). +#' +#' @returns +#' Tensor formed by stacking the given tensors. +#' +#' @param xs +#' Sequence of tensors. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.vstack +op_vstack <- +function (xs) +keras$ops$vstack(xs) + +#' Turn a function into a vectorized function. +#' +#' @description +#' +#' # Examples +#' +#' ```{r} +#' # currently does not work w/ tensorflow backend +#' if(config_backend() != "tensorflow") { +#' +#' myfunc <- function(a, b) a + b +#' +#' vfunc <- op_vectorize(myfunc) +#' y <- vfunc(c(1, 2, 3, 4), 2) +#' print(y) +#' # with Jax backend, y is: +#' # Array([3., 4., 5., 6.], dtype=float32) +#' } +#' ``` +#' +#' @returns +#' A new function that applies `func` to every element +#' of its input along axis 1 (the batch axis, the first axis). +#' +#' @param func +#' Callable of a single tensor argument. +#' +#' @param excluded +#' Optional set of integers representing +#' positional arguments for which the function +#' will not be vectorized. +#' These will be passed directly to `func` unmodified. +#' +#' @param signature +#' Optional generalized universal function signature, +#' e.g., `"(m,n),(n)->(m)"` for vectorized +#' matrix-vector multiplication. If provided, +#' `func` will be called with (and expected to return) +#' arrays with shapes given by the size of corresponding +#' core dimensions. By default, `func` is assumed +#' to take scalar tensors as input and output. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @tether keras.ops.vectorize +op_vectorize <- +function (func, ..., excluded = NULL, signature = NULL) +keras$ops$vectorize(func, ..., excluded = excluded, signature = signature) + + +#' Return elements chosen from `x1` or `x2` depending on `condition`. +#' +#' @returns +#' A tensor with elements from `x1` where `condition` is `TRUE`, and +#' elements from `x2` where `condition` is `FALSE`. +#' +#' @param condition +#' Where `TRUE`, yield `x1`, otherwise yield `x2`. +#' +#' @param x1 +#' Values from which to choose when `condition` is `TRUE`. +#' +#' @param x2 +#' Values from which to choose when `condition` is `FALSE`. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.where +op_where <- +function (condition, x1 = NULL, x2 = NULL) +keras$ops$where(condition, x1, x2) + + +#' Return a new tensor of given shape and type, filled with zeros. +#' +#' @returns +#' Tensor of zeros with the given shape and dtype. +#' +#' @param shape +#' Shape of the new tensor. +#' +#' @param dtype +#' Desired data type of the tensor. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.zeros +op_zeros <- +function (shape, dtype = NULL) +{ + args <- capture_args(list(shape = normalize_shape)) + do.call(keras$ops$zeros, args) +} + + +#' Return a tensor of zeros with the same shape and type as `x`. +#' +#' @returns +#' A tensor of zeros with the same shape and type as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @param dtype +#' Overrides the data type of the result. +#' +#' @export +#' @family numpy ops +#' @family ops +#' @seealso +#' + +# + +#' @tether keras.ops.zeros_like +op_zeros_like <- +function (x, dtype = NULL) +keras$ops$zeros_like(x, dtype) + + +#' CTC (Connectionist Temporal Classification) loss. +#' +#' @param target +#' A tensor of shape `(batch_size, max_length)` containing +#' the true labels in integer format. +#' +#' @param output +#' A tensor of shape `(batch_size, max_length, num_classes)` +#' containing logits (the output of your model). +#' +#' @param target_length +#' A tensor of shape `(batch_size)` containing the +#' true label lengths. +#' +#' @param output_length +#' A tensor of shape `(batch_size)` containing the +#' output lengths. +#' +#' @param mask_index +#' The index of the mask character in the vocabulary. +#' Defaults to `0`. +#' +#' @returns A tensor, shape `(batch_size)`, of loss values. +#' @export +#' @family nn ops +#' @family ops +#' @tether keras.ops.ctc_loss +# @seealso +# + +op_ctc_loss <- +function (target, output, target_length, output_length, mask_index = 0L) +{ + args <- capture_args(list(target = as_integer_array, mask_index = as_integer)) + do.call(keras$ops$ctc_loss, args) +} + + +#' Hard SiLU activation function, also known as Hard Swish. +#' +#' @description +#' It is defined as: +#' +#' - `0` if `if x < -3` +#' - `x` if `x > 3` +#' - `x * (x + 3) / 6` if `-3 <= x <= 3` +#' +#' It's a faster, piecewise linear approximation of the silu activation. +#' +#' # Examples +#' ```{r} +#' x <- op_convert_to_tensor(c(-3.0, -1.0, 0.0, 1.0, 3.0)) +#' op_hard_silu(x) +#' ``` +#' +#' @returns +#' A tensor with the same shape as `x`. +#' +#' @param x +#' Input tensor. +#' +#' @export +#' @family nn ops +#' @family ops +#' @tether keras.ops.hard_silu +# @seealso +# + +op_hard_silu <- +function (x) +keras$ops$hard_silu(x) + +#' @rdname op_hard_silu +#' @export +op_hard_swish <- +function (x) +keras$ops$hard_swish(x) + + +#' Decorator to define a function with a custom gradient. +#' +#' @description +#' This decorator allows fine grained control over the gradients of a sequence +#' for operations. This may be useful for multiple reasons, including providing +#' a more efficient or numerically stable gradient for a sequence of +#' operations. +#' +#' # Example +#' +#' Backend-agnostic example. +#' ```{r} +#' log1pexp <- op_custom_gradient(\(x) { +#' +#' e <- op_exp(x) +#' +#' grad <- function(..., upstream = NULL) { +#' upstream <- upstream %||% ..1 +#' op_multiply(upstream, 1.0 - 1.0 / op_add(1, e)) +#' } +#' +#' tuple(op_log(1 + e), grad) +#' +#' }) +#' +#' if(config_backend() == "tensorflow") { +#' tf <- tensorflow::tf +#' x <- op_convert_to_tensor(100.0) +#' with(tf$GradientTape() %as% tape, { +#' tape$watch(x) +#' y <- log1pexp(x) +#' }) +#' dy_dx <- tape$gradient(y, x) +#' stopifnot(as.numeric(dy_dx) == 1) +#' } +#' ``` +#' +#' @returns +#' A function `h(...)` which returns the same value as `f(...)[[1]]` and whose +#' gradient is determined by `f(...)[[2]]`. +#' +#' @param f +#' Function `f(...)` that returns a tuple `(output, grad_fn)` where: +#' - `...` is a sequence of unnamed arguments, +#' each a tensor input or nested structure of tensor inputs to the +#' function. +#' - `output` is a (potentially nested structure of) tensor outputs of applying +#' operations in forward_fn `f()` to `...`. +#' - `grad_fn` is a function with the signature `grad_fn(..., upstream)` which +#' returns a list of tensors the same size as (flattened) `...`: the +#' derivatives of tensors in `output` with respect to the tensors in +#' `...`. `upstream` is a tensor or +#' sequence of tensors holding the initial value gradients for each +#' tensor in `output`. +#' +#' @note +#' +#' Note that the `grad` function that returns gradient computation +#' requires `...` as well as an `upstream` named argument, depending +#' on the backend being set. With the JAX and TensorFlow backends, +#' it requires only one argument, whereas it might use the `upstream` +#' argument in the case of the PyTorch backend. +#' +#' When working with TensorFlow/JAX backend, `grad(upstream)` +#' is sufficient. With PyTorch, the `grad` function requires +#' `...` as well as `upstream`, e.g. `grad <- \(..., upstream)`. +#' Follow the example above to use `op_custom_gradient()` in +#' a way that is compatible with all backends. +#' +#' @export +#' @family core ops +#' @family ops +#' @tether keras.ops.custom_gradient +#' @seealso +#' + +op_custom_gradient <- +function (f) +keras$ops$custom_gradient(f) diff --git a/R/optimizers-schedules.R b/R/optimizers-schedules.R new file mode 100644 index 0000000000..231897c25b --- /dev/null +++ b/R/optimizers-schedules.R @@ -0,0 +1,563 @@ + + +#' A `LearningRateSchedule` that uses a cosine decay with optional warmup. +#' +#' @description +#' See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), +#' SGDR: Stochastic Gradient Descent with Warm Restarts. +#' +#' For the idea of a linear warmup of our learning rate, +#' see [Goyal et al.](https://arxiv.org/pdf/1706.02677.pdf). +#' +#' When we begin training a model, we often want an initial increase in our +#' learning rate followed by a decay. If `warmup_target` is an int, this +#' schedule applies a linear increase per optimizer step to our learning rate +#' from `initial_learning_rate` to `warmup_target` for a duration of +#' `warmup_steps`. Afterwards, it applies a cosine decay function taking our +#' learning rate from `warmup_target` to `alpha` for a duration of +#' `decay_steps`. If `warmup_target` is NULL we skip warmup and our decay +#' will take our learning rate from `initial_learning_rate` to `alpha`. +#' It requires a `step` value to compute the learning rate. You can +#' just pass a backend variable that you increment at each training step. +#' +#' The schedule is a 1-arg callable that produces a warmup followed by a +#' decayed learning rate when passed the current optimizer step. This can be +#' useful for changing the learning rate value across different invocations of +#' optimizer functions. +#' +#' Our warmup is computed as: +#' +#' ```{r} +#' warmup_learning_rate <- function(step) { +#' completed_fraction <- step / warmup_steps +#' total_delta <- target_warmup - initial_learning_rate +#' completed_fraction * total_delta +#' } +#' ``` +#' +#' And our decay is computed as: +#' +#' ```{r, eval=FALSE} +#' if (is.null(warmup_target)) { +#' initial_decay_lr <- initial_learning_rate +#' } else { +#' initial_decay_lr <- warmup_target +#' } +#' +#' decayed_learning_rate <- function(step) { +#' step <- min(step, decay_steps) +#' cosine_decay <- 0.5 * (1 + cos(pi * step / decay_steps)) +#' decayed <- (1 - alpha) * cosine_decay + alpha +#' initial_decay_lr * decayed +#' } +#' ``` +#' +#' Example usage without warmup: +#' +#' ```{r} +#' decay_steps <- 1000 +#' initial_learning_rate <- 0.1 +#' lr_decayed_fn <- learning_rate_schedule_cosine_decay( +#' initial_learning_rate, decay_steps) +#' ``` +#' +#' Example usage with warmup: +#' +#' ```{r} +#' decay_steps <- 1000 +#' initial_learning_rate <- 0 +#' warmup_steps <- 1000 +#' target_learning_rate <- 0.1 +#' lr_warmup_decayed_fn <- learning_rate_schedule_cosine_decay( +#' initial_learning_rate, decay_steps, warmup_target = target_learning_rate, +#' warmup_steps = warmup_steps +#' ) +#' ``` +#' +#' You can pass this schedule directly into a `optimizer` +#' as the learning rate. The learning rate schedule is also serializable and +#' deserializable using `keras$optimizers$schedules$serialize` and +#' `keras$optimizers$schedules$deserialize`. +#' +#' @returns +#' A 1-arg callable learning rate schedule that takes the current optimizer +#' step and outputs the decayed learning rate, a scalar tensor of the +#' same type as `initial_learning_rate`. +#' +#' @param initial_learning_rate +#' A float. The initial learning rate. +#' +#' @param decay_steps +#' A int. Number of steps to decay over. +#' +#' @param alpha +#' A float. Minimum learning rate value for decay as a +#' fraction of `initial_learning_rate`. +#' +#' @param name +#' String. Optional name of the operation. Defaults to +#' `"CosineDecay"`. +#' +#' @param warmup_target +#' A float. The target learning rate for our +#' warmup phase. Will cast to the `initial_learning_rate` datatype. +#' Setting to `NULL` will skip warmup and begins decay phase from +#' `initial_learning_rate`. Otherwise scheduler will warmup from +#' `initial_learning_rate` to `warmup_target`. +#' +#' @param warmup_steps +#' A int. Number of steps to warmup over. +#' +#' @export +#' @family optimizer learning rate schedules +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.schedules.CosineDecay +learning_rate_schedule_cosine_decay <- +function (initial_learning_rate, decay_steps, alpha = 0, name = "CosineDecay", + warmup_target = NULL, warmup_steps = 0L) +{ + args <- capture_args(list(decay_steps = as_integer, warmup_steps = as_integer)) + do.call(keras$optimizers$schedules$CosineDecay, args) +} + + +#' A `LearningRateSchedule` that uses a cosine decay schedule with restarts. +#' +#' @description +#' See [Loshchilov & Hutter, ICLR2016](https://arxiv.org/abs/1608.03983), +#' SGDR: Stochastic Gradient Descent with Warm Restarts. +#' +#' When training a model, it is often useful to lower the learning rate as +#' the training progresses. This schedule applies a cosine decay function with +#' restarts to an optimizer step, given a provided initial learning rate. +#' It requires a `step` value to compute the decayed learning rate. You can +#' just pass a backend variable that you increment at each training step. +#' +#' The schedule is a 1-arg callable that produces a decayed learning +#' rate when passed the current optimizer step. This can be useful for changing +#' the learning rate value across different invocations of optimizer functions. +#' +#' The learning rate multiplier first decays +#' from 1 to `alpha` for `first_decay_steps` steps. Then, a warm +#' restart is performed. Each new warm restart runs for `t_mul` times more +#' steps and with `m_mul` times initial learning rate as the new learning rate. +#' +#' # Example +#' ```{r, eval = TRUE} +#' first_decay_steps <- 1000 +#' lr_decayed_fn <- learning_rate_schedule_cosine_decay_restarts( +#' 0.001, +#' first_decay_steps) +#' ``` +#' +#' You can pass this schedule directly into a `optimizer` +#' as the learning rate. The learning rate schedule is also serializable and +#' deserializable using `keras$optimizers$schedules$serialize` and +#' `keras$optimizers$schedules$deserialize`. +#' +#' @returns +#' A 1-arg callable learning rate schedule that takes the current optimizer +#' step and outputs the decayed learning rate, a scalar tensor of the +#' same type as `initial_learning_rate`. +#' +#' @param initial_learning_rate +#' A float. The initial learning rate. +#' +#' @param first_decay_steps +#' An integer. Number of steps to decay over. +#' +#' @param t_mul +#' A float. Used to derive the number of iterations in +#' the i-th period. +#' +#' @param m_mul +#' A float. Used to derive the initial learning rate of +#' the i-th period. +#' +#' @param alpha +#' A float. Minimum learning rate value as a fraction of +#' the `initial_learning_rate`. +#' +#' @param name +#' String. Optional name of the operation. Defaults to +#' `"SGDRDecay"`. +#' +#' @export +#' @family optimizer learning rate schedules +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.schedules.CosineDecayRestarts +learning_rate_schedule_cosine_decay_restarts <- +function (initial_learning_rate, first_decay_steps, t_mul = 2, + m_mul = 1, alpha = 0, name = "SGDRDecay") +{ + args <- capture_args(list(first_decay_steps = as_integer)) + do.call(keras$optimizers$schedules$CosineDecayRestarts, args) +} + + +#' A `LearningRateSchedule` that uses an exponential decay schedule. +#' +#' @description +#' When training a model, it is often useful to lower the learning rate as +#' the training progresses. This schedule applies an exponential decay function +#' to an optimizer step, given a provided initial learning rate. +#' +#' The schedule is a 1-arg callable that produces a decayed learning +#' rate when passed the current optimizer step. This can be useful for changing +#' the learning rate value across different invocations of optimizer functions. +#' It is computed as: +#' +#' ```{r} +#' decayed_learning_rate <- function(step) { +#' initial_learning_rate * decay_rate ^ (step / decay_steps) +#' } +#' ``` +#' +#' If the argument `staircase` is `TRUE`, then `step / decay_steps` is +#' an integer division and the decayed learning rate follows a +#' staircase function. +#' +#' You can pass this schedule directly into a `optimizer` +#' as the learning rate. +#' +#' # Examples +#' When fitting a Keras model, decay every 100000 steps with a base +#' of 0.96: +#' +#' ```{r, eval=FALSE} +#' initial_learning_rate <- 0.1 +#' lr_schedule <- learning_rate_schedule_exponential_decay( +#' initial_learning_rate, +#' decay_steps=100000, +#' decay_rate=0.96, +#' staircase=TRUE) +#' +#' model %>% compile( +#' optimizer = optimizer_sgd(learning_rate = lr_schedule), +#' loss = 'sparse_categorical_crossentropy', +#' metrics = c('accuracy')) +#' +#' model %>% fit(data, labels, epochs=5) +#' ``` +#' +#' The learning rate schedule is also serializable and deserializable using +#' `keras$optimizers$schedules$serialize` and +#' `keras$optimizers$schedules$deserialize`. +#' +#' @returns +#' A 1-arg callable learning rate schedule that takes the current optimizer +#' step and outputs the decayed learning rate, a scalar tensor of the +#' same type as `initial_learning_rate`. +#' +#' @param initial_learning_rate +#' A float. The initial learning rate. +#' +#' @param decay_steps +#' A integer. Must be positive. See the decay +#' computation above. +#' +#' @param decay_rate +#' A float. The decay rate. +#' +#' @param staircase +#' Boolean. If `TRUE` decay the learning rate at discrete +#' intervals. +#' +#' @param name +#' String. Optional name of the operation. Defaults to +#' `"ExponentialDecay`". +#' +#' @export +#' @family optimizer learning rate schedules +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.schedules.ExponentialDecay +learning_rate_schedule_exponential_decay <- +function (initial_learning_rate, decay_steps, decay_rate, staircase = FALSE, + name = "ExponentialDecay") +{ + args <- capture_args(list(decay_steps = as_integer)) + do.call(keras$optimizers$schedules$ExponentialDecay, args) +} + + +#' A `LearningRateSchedule` that uses an inverse time decay schedule. +#' +#' @description +#' When training a model, it is often useful to lower the learning rate as +#' the training progresses. This schedule applies the inverse decay function +#' to an optimizer step, given a provided initial learning rate. +#' It requires a `step` value to compute the decayed learning rate. You can +#' just pass a backend variable that you increment at each training step. +#' +#' The schedule is a 1-arg callable that produces a decayed learning +#' rate when passed the current optimizer step. This can be useful for changing +#' the learning rate value across different invocations of optimizer functions. +#' It is computed as: +#' +#' ```{r} +#' decayed_learning_rate <- function(step) { +#' initial_learning_rate / (1 + decay_rate * step / decay_step) +#' } +#' ``` +#' +#' or, if `staircase` is `TRUE`, as: +#' +#' ```{r} +#' decayed_learning_rate <- function(step) { +#' initial_learning_rate / +#' (1 + decay_rate * floor(step / decay_step)) +#' } +#' ``` +#' +#' You can pass this schedule directly into a `optimizer_*` +#' as the learning rate. +#' +#' # Examples +#' Fit a Keras model when decaying 1/t with a rate of 0.5: +#' +#' ```{r, eval=FALSE} +#' ... +#' initial_learning_rate <- 0.1 +#' decay_steps <- 1.0 +#' decay_rate <- 0.5 +#' learning_rate_fn <- learning_rate_schedule_inverse_time_decay( +#' initial_learning_rate, decay_steps, decay_rate) +#' +#' model %>% compile( +#' optimizer = optimizer_sgd(learning_rate=learning_rate_fn), +#' loss = 'sparse_categorical_crossentropy', +#' metrics = 'accuracy') +#' ) +#' +#' model %>% fit(data, labels, epochs=5) +#' ``` +#' +#' @returns +#' A 1-arg callable learning rate schedule that takes the current optimizer +#' step and outputs the decayed learning rate, a scalar tensor of the +#' same type as `initial_learning_rate`. +#' +#' @param initial_learning_rate +#' A float. The initial learning rate. +#' +#' @param decay_steps +#' How often to apply decay. +#' +#' @param decay_rate +#' A number. The decay rate. +#' +#' @param staircase +#' Whether to apply decay in a discrete staircase, as o +#' pposed to continuous, fashion. +#' +#' @param name +#' String. Optional name of the operation. Defaults to +#' `"InverseTimeDecay"`. +#' +#' @export +#' @family optimizer learning rate schedules +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.schedules.InverseTimeDecay +learning_rate_schedule_inverse_time_decay <- +function (initial_learning_rate, decay_steps, decay_rate, staircase = FALSE, + name = "InverseTimeDecay") +{ + args <- capture_args() + do.call(keras$optimizers$schedules$InverseTimeDecay, args) +} + + +#' A `LearningRateSchedule` that uses a piecewise constant decay schedule. +#' +#' @description +#' The function returns a 1-arg callable to compute the piecewise constant +#' when passed the current optimizer step. This can be useful for changing the +#' learning rate value across different invocations of optimizer functions. +#' +#' # Examples +#' use a learning rate that's 1.0 for the first 100001 steps, 0.5 +#' for the next 10000 steps, and 0.1 for any additional steps. +#' +#' ```{r} +#' step <- 0 +#' boundaries <- c(100000, 110000) +#' values <- c(1.0, 0.5, 0.1) +#' learning_rate_fn <- learning_rate_schedule_piecewise_constant_decay( +#' boundaries, values) +#' +#' # Later, whenever we perform an optimization step, we pass in the step. +#' learning_rate <- learning_rate_fn(step) +#' ``` +#' +#' You can pass this schedule directly into a `optimizer` +#' as the learning rate. The learning rate schedule is also serializable and +#' deserializable using `keras$optimizers$schedules$serialize` and +#' `keras$optimizers$schedules$deserialize`. +#' +#' # Raises +#' ValueError: if the number of elements in the `boundaries` and `values` +#' lists do not match. +#' +#' @returns +#' A 1-arg callable learning rate schedule that takes the current optimizer +#' step and outputs the decayed learning rate, a scalar tensor of the +#' same type as the boundary tensors. +#' +#' The output of the 1-arg function that takes the `step` +#' is `values[0]` when `step <= boundaries[0]`, +#' `values[1]` when `step > boundaries[0]` and `step <= boundaries[1]`, +#' ..., and `values[-1]` when `step > boundaries[-1]`. +#' +#' @param boundaries +#' A list of Python numbers with strictly increasing +#' entries, and with all elements having the same type as the +#' optimizer step. +#' +#' @param values +#' A list of Python numbers that specifies the values for the +#' intervals defined by `boundaries`. It should have one more +#' element than `boundaries`, and all elements should have the same +#' type. +#' +#' @param name +#' A string. Optional name of the operation. Defaults to +#' `"PiecewiseConstant"`. +#' +#' @export +#' @family optimizer learning rate schedules +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.schedules.PiecewiseConstantDecay +learning_rate_schedule_piecewise_constant_decay <- +function (boundaries, values, name = "PiecewiseConstant") +{ + args <- capture_args() + do.call(keras$optimizers$schedules$PiecewiseConstantDecay, + args) +} + + +#' A `LearningRateSchedule` that uses a polynomial decay schedule. +#' +#' @description +#' It is commonly observed that a monotonically decreasing learning rate, whose +#' degree of change is carefully chosen, results in a better performing model. +#' This schedule applies a polynomial decay function to an optimizer step, +#' given a provided `initial_learning_rate`, to reach an `end_learning_rate` +#' in the given `decay_steps`. +#' +#' It requires a `step` value to compute the decayed learning rate. You +#' can just pass a backend variable that you increment at each training +#' step. +#' +#' The schedule is a 1-arg callable that produces a decayed learning rate +#' when passed the current optimizer step. This can be useful for changing the +#' learning rate value across different invocations of optimizer functions. +#' It is computed as: +#' +#' ```{r} +#' decayed_learning_rate <- function(step) { +#' step = min(step, decay_steps) +#' ((initial_learning_rate - end_learning_rate) * +#' (1 - step / decay_steps) ^ (power)) + +#' end_learning_rate +#' } +#' ``` +#' +#' If `cycle` is TRUE then a multiple of `decay_steps` is used, the first one +#' that is bigger than `step`. +#' +#' ```{r} +#' decayed_learning_rate <- function(step) { +#' decay_steps = decay_steps * ceil(step / decay_steps) +#' ((initial_learning_rate - end_learning_rate) * +#' (1 - step / decay_steps) ^ (power)) + +#' end_learning_rate +#' } +#' ``` +#' +#' You can pass this schedule directly into a `Optimizer` +#' as the learning rate. +#' +#' # Examples +#' Fit a model while decaying from 0.1 to 0.01 in 10000 steps using +#' sqrt (i.e. power=0.5): +#' +#' ```{r, eval=FALSE} +#' ... +#' starter_learning_rate <- 0.1 +#' end_learning_rate <- 0.01 +#' decay_steps <- 10000 +#' learning_rate_fn <- learning_rate_schedule_polynomial_decay( +#' starter_learning_rate, +#' decay_steps, +#' end_learning_rate, +#' power=0.5) +#' +#' model %>% compile( +#' optimizer = optimizer_sgd(learning_rate=learning_rate_fn), +#' loss = 'sparse_categorical_crossentropy', +#' metrics = 'accuracy' +#' ) +#' +#' model %>% fit(data, labels, epochs=5) +#' ``` +#' +#' The learning rate schedule is also serializable and deserializable using +#' `keras$optimizers$schedules$serialize` and +#' `keras$optimizers$schedules$deserialize`. +#' +#' @returns +#' A 1-arg callable learning rate schedule that takes the current optimizer +#' step and outputs the decayed learning rate, a scalar tensor of the +#' same type as `initial_learning_rate`. +#' +#' @param initial_learning_rate +#' A float. The initial learning rate. +#' +#' @param decay_steps +#' A integer. Must be positive. See the decay +#' computation above. +#' +#' @param end_learning_rate +#' A float. The minimal end learning rate. +#' +#' @param power +#' A float. The power of the polynomial. Defaults to +#' `1.0`. +#' +#' @param cycle +#' A boolean, whether it should cycle beyond decay_steps. +#' +#' @param name +#' String. Optional name of the operation. Defaults to +#' `"PolynomialDecay"`. +#' +#' @export +#' @family optimizer learning rate schedules +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.schedules.PolynomialDecay +learning_rate_schedule_polynomial_decay <- +function (initial_learning_rate, decay_steps, end_learning_rate = 1e-04, + power = 1, cycle = FALSE, name = "PolynomialDecay") +{ + args <- capture_args(list(decay_steps = as_integer)) + do.call(keras$optimizers$schedules$PolynomialDecay, args) +} diff --git a/R/optimizers.R b/R/optimizers.R index 691335a9b2..c6d6f86ed6 100644 --- a/R/optimizers.R +++ b/R/optimizers.R @@ -1,277 +1,1511 @@ -#' Stochastic gradient descent optimizer +#' Optimizer that implements the Adadelta algorithm. #' -#' Stochastic gradient descent optimizer with support for momentum, learning -#' rate decay, and Nesterov momentum. +#' @description +#' Adadelta optimization is a stochastic gradient descent method that is based +#' on adaptive learning rate per dimension to address two drawbacks: #' -#' @param learning_rate float >= 0. Learning rate. -#' @param momentum float >= 0. Parameter that accelerates SGD in the relevant -#' direction and dampens oscillations. -#' @param decay float >= 0. Learning rate decay over each update. -#' @param nesterov boolean. Whether to apply Nesterov momentum. -#' @param clipnorm Gradients will be clipped when their L2 norm exceeds this -#' value. -#' @param clipvalue Gradients will be clipped when their absolute value exceeds -#' this value. -#' @param ... Unused, present only for backwards compatability +#' - The continual decay of learning rates throughout training. +#' - The need for a manually selected global learning rate. #' -#' @return Optimizer for use with \code{\link{compile.keras.engine.training.Model}}. +#' Adadelta is a more robust extension of Adagrad that adapts learning rates +#' based on a moving window of gradient updates, instead of accumulating all +#' past gradients. This way, Adadelta continues learning even when many updates +#' have been done. Compared to Adagrad, in the original version of Adadelta you +#' don't have to set an initial learning rate. In this version, the initial +#' learning rate can be set, as in most other Keras optimizers. #' -#' @family optimizers +#' # Reference +#' - [Zeiler, 2012](https://arxiv.org/abs/1212.5701) +#' +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()]` instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. Note that `Adadelta` +#' tends to benefit from higher initial learning rate values compared +#' to other optimizers. To match the exact form in the original paper, +#' use 1.0. +#' +#' @param rho +#' A floating point value. The decay rate. Defaults to `0.95`. +#' +#' @param epsilon +#' Small floating point value for maintaining numerical stability. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change +#' after each training batch), and periodically overwriting the +#' weights with their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema = TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema = TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, +#' and you need to explicitly overwrite the variables +#' at the end of training by calling +#' `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before +#' updating variables. Useful for preventing underflow during +#' mixed precision training. Alternately, +#' [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param gradient_accumulation_steps +#' Int or `NULL`. If an int, model & optimizer +#' variables will not be updated at every step; instead they will be +#' updated every `gradient_accumulation_steps` steps, using the average +#' value of the gradients since the last update. This is known as +#' "gradient accumulation". This can be useful +#' when your batch size is very small, in order to reduce gradient +#' noise at each update step. +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -optimizer_sgd <- function(learning_rate = 0.01, momentum = 0.0, decay = 0.0, nesterov = FALSE, - clipnorm = NULL, clipvalue = NULL, ...) { - - backcompat_fix_rename_lr_to_learning_rate(...) - - # compose args using list so that clipnorm and clipvalue are excluded - # from the call when they aren't sepcified - args <- list( - learning_rate = learning_rate, - momentum = momentum, - decay = decay, - nesterov = nesterov - ) - args$clipnorm <- clipnorm - args$clipvalue <- clipvalue - do.call(keras$optimizers$SGD, args) +#' @returns an `Optimizer` instance +#' @family optimizers +#' @seealso +#' + +# + +#' @tether keras.optimizers.Adadelta +optimizer_adadelta <- +function (learning_rate = 0.001, rho = 0.95, epsilon = 1e-07, + weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, global_clipnorm = NULL, + use_ema = FALSE, ema_momentum = 0.99, ema_overwrite_frequency = NULL, + name = "adadelta", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list( + ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Adadelta, args) } -#' RMSProp optimizer + +#' Optimizer that implements the Adafactor algorithm. #' -#' @inheritParams optimizer_sgd -#' @param rho float >= 0. Decay factor. -#' @param epsilon float >= 0. Fuzz factor. If `NULL`, defaults to `k_epsilon()`. +#' @description +#' Adafactor is commonly used in NLP tasks, and has the advantage +#' of taking less memory because it only saves partial information of previous +#' gradients. #' -#' @note It is recommended to leave the parameters of this optimizer at their -#' default values (except the learning rate, which can be freely tuned). +#' The default argument setup is based on the original paper (see reference). +#' When gradients are of dimension > 2, Adafactor optimizer will delete the +#' last 2 dimensions separately in its accumulator variables. #' -#' This optimizer is usually a good choice for recurrent neural networks. +#' # Reference +#' - [Shazeer, Noam et al., 2018](https://arxiv.org/abs/1804.04235). #' -#' @family optimizers +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param beta_2_decay +#' float, defaults to -0.8. The decay rate of `beta_2`. +#' +#' @param epsilon_1 +#' float, defaults to 1e-30. A small offset to keep denominator +#' away from 0. +#' +#' @param epsilon_2 +#' float, defaults to 1e-3. A small offset to avoid learning +#' rate becoming too small by time. +#' +#' @param clip_threshold +#' float, defaults to 1.0. Clipping threshold. This is a +#' part of Adafactor algorithm, independent from `clipnorm`, +#' `clipvalue`, and `global_clipnorm`. +#' +#' @param relative_step +#' bool, defaults to `TRUE`. If `learning_rate` is a +#' constant and `relative_step=TRUE`, learning rate will be adjusted +#' based on current iterations. This is a default learning rate decay +#' in Adafactor. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change +#' after each training batch), and periodically overwriting the +#' weights with their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema = TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, +#' and you need to explicitly overwrite the variables +#' at the end of training by calling +#' `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta #' #' @export -optimizer_rmsprop <- function(learning_rate = 0.001, rho = 0.9, epsilon = NULL, decay = 0.0, - clipnorm = NULL, clipvalue = NULL, ...) { - - backcompat_fix_rename_lr_to_learning_rate(...) - - # compose args using list so that clipnorm and clipvalue are excluded - # from the call when they aren't sepcified - - args <- list( - learning_rate = learning_rate, - rho = rho, - epsilon = resolve_epsilon(epsilon), - decay = decay - ) - args$clipnorm <- clipnorm - args$clipvalue <- clipvalue - do.call(keras$optimizers$RMSprop, args) +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' @tether keras.optimizers.Adafactor +optimizer_adafactor <- +function (learning_rate = 0.001, beta_2_decay = -0.8, epsilon_1 = 1e-30, + epsilon_2 = 0.001, clip_threshold = 1, relative_step = TRUE, + weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, global_clipnorm = NULL, + use_ema = FALSE, ema_momentum = 0.99, ema_overwrite_frequency = NULL, + name = "adafactor", ..., loss_scale_factor = NULL, gradient_accumulation_steps = NULL) +{ + args <- capture_args(list( + ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Adafactor, args) } -#' Adagrad optimizer. +#' Optimizer that implements the Adagrad algorithm. #' -#' Adagrad optimizer as described in [Adaptive Subgradient Methods for Online -#' Learning and Stochastic -#' Optimization](https://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). +#' @description +#' Adagrad is an optimizer with parameter-specific learning rates, +#' which are adapted relative to how frequently a parameter gets +#' updated during training. The more updates a parameter receives, +#' the smaller the updates. #' -#' @inheritParams optimizer_rmsprop +#' # Reference +#' - [Duchi et al., 2011]( +#' https://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf). #' -#' @note It is recommended to leave the parameters of this optimizer at their -#' default values. +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. Note that `Adagrad` +#' tends to benefit from higher initial learning rate values compared +#' to other optimizers. To match the exact form in the original paper, +#' use `1.0`. #' -#' @family optimizers +#' @param initial_accumulator_value +#' Floating point value. Starting value for the +#' accumulators (per-parameter momentum values). Must be non-negative. +#' +#' @param epsilon +#' Small floating point value for maintaining numerical stability. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @inheritParams optimizer_adadelta +#' +#' @param ... +#' For forward/backward compatability. #' #' @export -optimizer_adagrad <- function(learning_rate = 0.01, epsilon = NULL, decay = 0.0, - clipnorm = NULL, clipvalue = NULL, ...) { - - backcompat_fix_rename_lr_to_learning_rate(...) - - # compose args using list so that clipnorm and clipvalue are excluded - # from the call when they aren't sepcified - args <- list( - learning_rate = learning_rate, - epsilon = resolve_epsilon(epsilon), - decay = decay - ) - args$clipnorm <- clipnorm - args$clipvalue <- clipvalue - do.call(keras$optimizers$Adagrad, args) +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' @tether keras.optimizers.Adagrad +optimizer_adagrad <- +function (learning_rate = 0.001, initial_accumulator_value = 0.1, + epsilon = 1e-07, weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = FALSE, ema_momentum = 0.99, + ema_overwrite_frequency = NULL, name = "adagrad", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Adagrad, args) } -#' Adadelta optimizer. + +#' Optimizer that implements the Adam algorithm. #' -#' Adadelta optimizer as described in [ADADELTA: An Adaptive Learning Rate -#' Method](https://arxiv.org/abs/1212.5701). +#' @description +#' Adam optimization is a stochastic gradient descent method that is based on +#' adaptive estimation of first-order and second-order moments. #' -#' @inheritParams optimizer_rmsprop +#' According to +#' [Kingma et al., 2014](https://arxiv.org/abs/1412.6980), +#' the method is "*computationally +#' efficient, has little memory requirement, invariant to diagonal rescaling of +#' gradients, and is well suited for problems that are large in terms of +#' data/parameters*". #' -#' @note It is recommended to leave the parameters of this optimizer at their -#' default values. +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. #' -#' @family optimizers +#' @param beta_1 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 1st moment estimates. Defaults to +#' `0.9`. +#' +#' @param beta_2 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 2nd moment estimates. Defaults to +#' `0.999`. +#' +#' @param epsilon +#' A small constant for numerical stability. This epsilon is +#' "epsilon hat" in the Kingma and Ba paper (in the formula just before +#' Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults +#' to `1e-7`. +#' +#' @param amsgrad +#' Boolean. Whether to apply AMSGrad variant of this algorithm +#' from the paper "On the Convergence of Adam and beyond". Defaults +#' to `FALSE`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta #' #' @export -optimizer_adadelta <- function(learning_rate = 1.0, rho = 0.95, epsilon = NULL, decay = 0.0, - clipnorm = NULL, clipvalue = NULL, ...) { - - backcompat_fix_rename_lr_to_learning_rate(...) - - # compose args using list so that clipnorm and clipvalue are excluded - # from the call when they aren't sepcified - args <- list( - learning_rate = learning_rate, - rho = rho, - epsilon = resolve_epsilon(epsilon), - decay = decay - ) - args$clipnorm <- clipnorm - args$clipvalue <- clipvalue - do.call(keras$optimizers$Adadelta, args) +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' @tether keras.optimizers.Adam +optimizer_adam <- +function (learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, + epsilon = 1e-07, amsgrad = FALSE, weight_decay = NULL, clipnorm = NULL, + clipvalue = NULL, global_clipnorm = NULL, use_ema = FALSE, + ema_momentum = 0.99, ema_overwrite_frequency = NULL, name = "adam", + ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Adam, args) } -#' Adam optimizer + +#' Optimizer that implements the Adamax algorithm. #' -#' Adam optimizer as described in [Adam - A Method for Stochastic -#' Optimization](https://arxiv.org/abs/1412.6980v8). +#' @description +#' Adamax, a variant of Adam based on the infinity norm, is a first-order +#' gradient-based optimization method. Due to its capability of adjusting the +#' learning rate based on data characteristics, it is suited to learn +#' time-variant process, e.g., speech data with dynamically changed noise +#' conditions. Default parameters follow those provided in the paper (see +#' references below). #' -#' @inheritParams optimizer_rmsprop -#' @param beta_1 The exponential decay rate for the 1st moment estimates. float, -#' 0 < beta < 1. Generally close to 1. -#' @param beta_2 The exponential decay rate for the 2nd moment estimates. float, -#' 0 < beta < 1. Generally close to 1. -#' @param amsgrad Whether to apply the AMSGrad variant of this algorithm from -#' the paper "On the Convergence of Adam and Beyond". +#' Initialization: #' -#' @note Default parameters follow those provided in the original paper. +#' ```{r} +#' m <- 0 # Initialize initial 1st moment vector +#' u <- 0 # Initialize the exponentially weighted infinity norm +#' t <- 0 # Initialize timestep +#' ``` #' -#' @section References: -#' - [Adam - A Method for Stochastic Optimization](https://arxiv.org/abs/1412.6980v8) -#' - [On the Convergence of Adam and Beyond](https://openreview.net/forum?id=ryQu7f-RZ) +#' The update rule for parameter `w` with gradient `g` is described at the end +#' of section 7.1 of the paper (see the referenece section): #' -#' @family optimizers +#' ```{r, eval=FALSE} +#' t <- t + 1 +#' m <- beta1 * m + (1 - beta) * g +#' u <- max(beta2 * u, abs(g)) +#' current_lr <- learning_rate / (1 - beta1 ** t) +#' w <- w - current_lr * m / (u + epsilon) +#' ``` +#' +#' # Reference +#' - [Kingma et al., 2014](https://arxiv.org/abs/1412.6980) +#' +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param beta_1 +#' A float value or a constant float tensor. The exponential decay +#' rate for the 1st moment estimates. +#' +#' @param beta_2 +#' A float value or a constant float tensor. The exponential decay +#' rate for the exponentially weighted infinity norm. +#' +#' @param epsilon +#' A small constant for numerical stability. +#' name: String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or NULL, defaults to NULL. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If NULL, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param name +#' String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta #' #' @export -optimizer_adam <- function(learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, epsilon = NULL, decay = 0.0, - amsgrad = FALSE, clipnorm = NULL, clipvalue = NULL, ...) { +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.Adamax +optimizer_adamax <- +function (learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, + epsilon = 1e-07, weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = FALSE, ema_momentum = 0.99, + ema_overwrite_frequency = NULL, name = "adamax", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Adamax, args) +} - backcompat_fix_rename_lr_to_learning_rate(...) - # compose args using list so that clipnorm and clipvalue are excluded - # from the call when they aren't sepcified - args <- list( - learning_rate = learning_rate, - beta_1 = beta_1, - beta_2 = beta_2, - epsilon = resolve_epsilon(epsilon), - decay = decay - ) - args$clipnorm <- clipnorm - args$clipvalue <- clipvalue +#' Optimizer that implements the AdamW algorithm. +#' +#' @description +#' AdamW optimization is a stochastic gradient descent method that is based on +#' adaptive estimation of first-order and second-order moments with an added +#' method to decay weights per the techniques discussed in the paper, +#' 'Decoupled Weight Decay Regularization' by +#' [Loshchilov, Hutter et al., 2019](https://arxiv.org/abs/1711.05101). +#' +#' According to +#' [Kingma et al., 2014](https://arxiv.org/abs/1412.6980), +#' the underying Adam method is "*computationally +#' efficient, has little memory requirement, invariant to diagonal rescaling of +#' gradients, and is well suited for problems that are large in terms of +#' data/parameters*". +#' +#' # References +#' - [Loshchilov et al., 2019](https://arxiv.org/abs/1711.05101) +#' - [Kingma et al., 2014](https://arxiv.org/abs/1412.6980) for `adam` +#' - [Reddi et al., 2018]( +#' https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`. +#' +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param beta_1 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 1st moment estimates. +#' Defaults to `0.9`. +#' +#' @param beta_2 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 2nd moment estimates. +#' Defaults to `0.999`. +#' +#' @param epsilon +#' A small constant for numerical stability. This epsilon is +#' "epsilon hat" in the Kingma and Ba paper (in the formula just +#' before Section 2.1), not the epsilon in Algorithm 1 of the paper. +#' Defaults to 1e-7. +#' +#' @param amsgrad +#' Boolean. Whether to apply AMSGrad variant of this algorithm +#' from the paper "On the Convergence of Adam and beyond". +#' Defaults to `FALSE`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta +#' +#' @export +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' @tether keras.optimizers.AdamW +optimizer_adam_w <- +function (learning_rate = 0.001, weight_decay = 0.004, beta_1 = 0.9, + beta_2 = 0.999, epsilon = 1e-07, amsgrad = FALSE, clipnorm = NULL, + clipvalue = NULL, global_clipnorm = NULL, use_ema = FALSE, + ema_momentum = 0.99, ema_overwrite_frequency = NULL, name = "adamw", + ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$AdamW, args) +} - if (keras_version() >= "2.1.3") - args$amsgrad <- amsgrad - do.call(keras$optimizers$Adam, args) +#' Optimizer that implements the FTRL algorithm. +#' +#' @description +#' "Follow The Regularized Leader" (FTRL) is an optimization algorithm +#' developed at Google for click-through rate prediction in the early 2010s. It +#' is most suitable for shallow models with large and sparse feature spaces. +#' The algorithm is described by +#' [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf). +#' The Keras version has support for both online L2 regularization +#' (the L2 regularization described in the paper +#' above) and shrinkage-type L2 regularization +#' (which is the addition of an L2 penalty to the loss function). +#' +#' Initialization: +#' +#' ```{r} +#' n <- 0 +#' sigma <- 0 +#' z <- 0 +#' ``` +#' +#' Update rule for one variable `w`: +#' +#' ```{r, eval=FALSE} +#' prev_n <- n +#' n <- n + g^2 +#' sigma <- (n^(-lr_power) - prev_n^(-lr_power)) / lr +#' z <- z + g - sigma * w +#' if (abs(z) < lambda_1) { +#' w <- 0 +#' } else { +#' w <- (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2) +#' } +#' ``` +#' +#' Notation: +#' +#' - `lr` is the learning rate +#' - `g` is the gradient for the variable +#' - `lambda_1` is the L1 regularization strength +#' - `lambda_2` is the L2 regularization strength +#' - `lr_power` is the power to scale n. +#' +#' Check the documentation for the `l2_shrinkage_regularization_strength` +#' parameter for more details when shrinkage is enabled, in which case gradient +#' is replaced with a gradient with shrinkage. +#' +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param learning_rate_power +#' A float value, must be less or equal to zero. +#' Controls how the learning rate decreases during training. Use zero +#' for a fixed learning rate. +#' +#' @param initial_accumulator_value +#' The starting value for accumulators. Only +#' zero or positive values are allowed. +#' +#' @param l1_regularization_strength +#' A float value, must be greater than or equal +#' to zero. Defaults to `0.0`. +#' +#' @param l2_regularization_strength +#' A float value, must be greater than or equal +#' to zero. Defaults to `0.0`. +#' +#' @param l2_shrinkage_regularization_strength +#' A float value, must be greater +#' than or equal to zero. This differs from L2 above in that the L2 +#' above is a stabilization penalty, whereas this L2 shrinkage is a +#' magnitude penalty. When input is sparse shrinkage will only happen +#' on the active weights. +#' +#' @param beta +#' A float value, representing the beta value from the paper. +#' Defaults to `0.0`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or NULL, defaults to NULL. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If NULL, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, `optimizer_loss_scale` will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta +#' +#' +#' @export +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.Ftrl +optimizer_ftrl <- +function (learning_rate = 0.001, learning_rate_power = -0.5, + initial_accumulator_value = 0.1, l1_regularization_strength = 0, + l2_regularization_strength = 0, l2_shrinkage_regularization_strength = 0, + beta = 0, weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = FALSE, ema_momentum = 0.99, + ema_overwrite_frequency = NULL, name = "ftrl", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Ftrl, args) } -# TODO: decay position moved -# tf.keras.optimizers.Adam( -# learning_rate=0.001, -# beta_1=0.9, -# beta_2=0.999, -# epsilon=1e-07, -# amsgrad=False, -# name='Adam', -# **kwargs -# ) -#' Adamax optimizer +#' Optimizer that implements the Lion algorithm. #' -#' Adamax optimizer from Section 7 of the [Adam paper](https://arxiv.org/abs/1412.6980v8). -#' It is a variant of Adam based on the infinity norm. +#' @description +#' The Lion optimizer is a stochastic-gradient-descent method that uses the +#' sign operator to control the magnitude of the update, unlike other adaptive +#' optimizers such as Adam that rely on second-order moments. This make +#' Lion more memory-efficient as it only keeps track of the momentum. According +#' to the authors (see reference), its performance gain over Adam grows with +#' the batch size. Because the update of Lion is produced through the sign +#' operation, resulting in a larger norm, a suitable learning rate for Lion is +#' typically 3-10x smaller than that for AdamW. The weight decay for Lion +#' should be in turn 3-10x larger than that for AdamW to maintain a +#' similar strength (lr * wd). #' -#' @inheritParams optimizer_adam +#' # References +#' - [Chen et al., 2023](https://arxiv.org/abs/2302.06675) +#' - [Authors' implementation]( +#' https://github.com/google/automl/tree/master/lion) +#' +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param beta_1 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' rate to combine the current gradient and the 1st moment estimate. +#' Defaults to `0.9`. +#' +#' @param beta_2 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 1st moment estimate. Defaults to +#' `0.99`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta #' -#' @family optimizers #' #' @export -optimizer_adamax <- function(learning_rate = 0.002, beta_1 = 0.9, beta_2 = 0.999, epsilon = NULL, decay = 0.0, - clipnorm = NULL, clipvalue = NULL, ...) { - - backcompat_fix_rename_lr_to_learning_rate(...) +#' @family optimizers +#' @returns an `Optimizer` instance +# @seealso +# + +#' @tether keras.optimizers.Lion +optimizer_lion <- +function (learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.99, + weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, global_clipnorm = NULL, + use_ema = FALSE, ema_momentum = 0.99, ema_overwrite_frequency = NULL, + name = "lion", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Lion, args) +} - # compose args using list so that clipnorm and clipvalue are excluded - # from the call when they aren't sepcified - args <- list( - learning_rate = learning_rate, - beta_1 = beta_1, - beta_2 = beta_2, - epsilon = resolve_epsilon(epsilon), - decay = decay - ) - args$clipnorm <- clipnorm - args$clipvalue <- clipvalue - do.call(keras$optimizers$Adamax, args) +#' An optimizer that dynamically scales the loss to prevent underflow. +#' +#' @description +#' Loss scaling is a technique to prevent numeric underflow in intermediate +#' gradients when float16 is used. To prevent underflow, the loss is multiplied +#' (or "scaled") by a certain factor called the "loss scale", which causes +#' intermediate gradients to be scaled by the loss scale as well. The final +#' gradients are divided (or "unscaled") by the loss scale to bring them back +#' to their original value. +#' +#' `LossScaleOptimizer` wraps another optimizer and applies dynamic loss +#' scaling to it. This loss scale is dynamically updated over time as follows: +#' - On any train step, if a nonfinite gradient is encountered, the loss scale +#' is halved, and the train step is skipped. +#' - If `dynamic_growth_steps` have ocurred since the last time the loss scale +#' was updated, and no nonfinite gradients have occurred, the loss scale +#' is doubled. +#' +#' @param inner_optimizer +#' The keras `Optimizer` instance to wrap. +#' +#' @param initial_scale +#' Float. The initial loss scale. This scale will be updated +#' during training. It is recommended for this to be a very high +#' number, because a loss scale that is too high gets lowered far more +#' quickly than a loss scale that is too low gets raised. +#' +#' @param dynamic_growth_steps +#' Int. How often to update the scale upwards. After +#' every `dynamic_growth_steps` steps with finite gradients, the +#' loss scale is doubled. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta +#' +#' +#' @export +#' @family optimizers +#' @returns an `Optimizer` instance +# @seealso +# + +#' @tether keras.optimizers.LossScaleOptimizer +optimizer_loss_scale <- +function (inner_optimizer, initial_scale = 32768, dynamic_growth_steps = 2000L, + ..., name = NULL, weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = NULL, ema_momentum = NULL, + ema_overwrite_frequency = NULL, loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(dynamic_growth_steps = as_integer, + ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$LossScaleOptimizer, args) } -#' Nesterov Adam optimizer + +#' Optimizer that implements the Nadam algorithm. #' -#' Much like Adam is essentially RMSprop with momentum, Nadam is Adam RMSprop -#' with Nesterov momentum. +#' @description +#' Much like Adam is essentially RMSprop with momentum, Nadam is Adam with +#' Nesterov momentum. #' -#' @inheritParams optimizer_adam -#' @param schedule_decay Schedule deacy. +#' # Reference +#' - [Dozat, 2015](https://cs229.stanford.edu/proj2015/054_report.pdf). #' -#' @details Default parameters follow those provided in the paper. It is -#' recommended to leave the parameters of this optimizer at their default -#' values. +#' @param learning_rate +#' A float, a +#' [`LearningRateSchedule()`] instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. #' -#' @seealso [On the importance of initialization and momentum in deep -#' learning](https://www.cs.toronto.edu/~fritz/absps/momentum.pdf). +#' @param beta_1 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 1st moment estimates. +#' Defaults to `0.9`. +#' +#' @param beta_2 +#' A float value or a constant float tensor, or a callable +#' that takes no arguments and returns the actual value to use. The +#' exponential decay rate for the 2nd moment estimates. Defaults to +#' `0.999`. +#' +#' @param epsilon +#' A small constant for numerical stability. This epsilon is +#' "epsilon hat" in the Kingma and Ba paper (in the formula just before +#' Section 2.1), not the epsilon in Algorithm 1 of the paper. +#' Defaults to `1e-7`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or `NULL`, defaults to `NULL`. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If `NULL`, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta #' -#' @family optimizers #' #' @export -optimizer_nadam <- function(learning_rate = 0.002, beta_1 = 0.9, beta_2 = 0.999, epsilon = NULL, - schedule_decay = 0.004, clipnorm = NULL, clipvalue = NULL, ...) { - - backcompat_fix_rename_lr_to_learning_rate(...) +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' @tether keras.optimizers.Nadam +optimizer_nadam <- +function (learning_rate = 0.001, beta_1 = 0.9, beta_2 = 0.999, + epsilon = 1e-07, weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = FALSE, ema_momentum = 0.99, + ema_overwrite_frequency = NULL, name = "nadam", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$Nadam, args) +} - # compose args using list so that clipnorm and clipvalue are excluded - # from the call when they aren't sepcified - args <- list( - learning_rate = learning_rate, - beta_1 = beta_1, - beta_2 = beta_2, - epsilon = resolve_epsilon(epsilon), - schedule_decay = schedule_decay - ) - args$clipnorm <- clipnorm - args$clipvalue <- clipvalue - do.call(keras$optimizers$Nadam, args) +#' Optimizer that implements the RMSprop algorithm. +#' +#' @description +#' The gist of RMSprop is to: +#' +#' - Maintain a moving (discounted) average of the square of gradients +#' - Divide the gradient by the root of this average +#' +#' This implementation of RMSprop uses plain momentum, not Nesterov momentum. +#' +#' The centered version additionally maintains a moving average of the +#' gradients, and uses that average to estimate the variance. +#' +#' # Usage +#' ```{r} +#' opt <- optimizer_rmsprop(learning_rate=0.1) +#' ``` +#' +#' # Reference +#' - [Hinton, 2012]( +#' https://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) +#' +#' @param learning_rate +#' A float, a +#' `learning_rate_schedule_*` instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.001`. +#' +#' @param rho +#' float, defaults to 0.9. Discounting factor for the old gradients. +#' +#' @param momentum +#' float, defaults to 0.0. If not 0.0., the optimizer tracks the +#' momentum value, with a decay rate equals to `1 - momentum`. +#' +#' @param epsilon +#' A small constant for numerical stability. This epsilon is +#' "epsilon hat" in the Kingma and Ba paper (in the formula just before +#' Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults +#' to 1e-7. +#' +#' @param centered +#' Boolean. If `TRUE`, gradients are normalized by the estimated +#' variance of the gradient; if FALSE, by the uncentered second moment. +#' Setting this to `TRUE` may help with training, but is slightly more +#' expensive in terms of computation and memory. Defaults to `FALSE`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or NULL, defaults to NULL. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If NULL, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, [`optimizer_loss_scale()`] will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta +#' +#' +#' @export +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.RMSprop +optimizer_rmsprop <- +function (learning_rate = 0.001, rho = 0.9, momentum = 0, epsilon = 1e-07, + centered = FALSE, weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, + global_clipnorm = NULL, use_ema = FALSE, ema_momentum = 0.99, + ema_overwrite_frequency = NULL, name = "rmsprop", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$RMSprop, args) } -resolve_epsilon <- function(epsilon) { - if (is.null(epsilon) && keras_version() < "2.1.3") - k_epsilon() - else - epsilon -} -backcompat_fix_rename_lr_to_learning_rate <- function(..., lr) { - if (!missing(lr)) { - warning("the `lr` argument has been renamed to `learning_rate`.") - if (!eval.parent(quote(missing(learning_rate)))) - stop("You can't supply both `lr` and `learning_rate`") - assign("learning_rate", lr, parent.frame()) - } - ellipsis::check_dots_empty() +#' Gradient descent (with momentum) optimizer. +#' +#' @description +#' Update rule for parameter `w` with gradient `g` when `momentum` is 0: +#' +#' ```{r, eval = FALSE} +#' w <- w - learning_rate * g +#' ``` +#' +#' Update rule when `momentum` is larger than 0: +#' +#' ```{r, eval = FALSE} +#' velocity <- momentum * velocity - learning_rate * g +#' w <- w + velocity +#' ``` +#' +#' When `nesterov=TRUE`, this rule becomes: +#' +#' ```{r, eval = FALSE} +#' velocity <- momentum * velocity - learning_rate * g +#' w <- w + momentum * velocity - learning_rate * g +#' ``` +#' +#' @param learning_rate +#' A float, a +#' `learning_rate_schedule_*` instance, or +#' a callable that takes no arguments and returns the actual value to +#' use. The learning rate. Defaults to `0.01`. +#' +#' @param momentum +#' float hyperparameter >= 0 that accelerates gradient descent in +#' the relevant direction and dampens oscillations. 0 is vanilla +#' gradient descent. Defaults to `0.0`. +#' +#' @param nesterov +#' boolean. Whether to apply Nesterov momentum. +#' Defaults to `FALSE`. +#' +#' @param name +#' String. The name to use +#' for momentum accumulator weights created by +#' the optimizer. +#' +#' @param weight_decay +#' Float. If set, weight decay is applied. +#' +#' @param clipnorm +#' Float. If set, the gradient of each weight is individually +#' clipped so that its norm is no higher than this value. +#' +#' @param clipvalue +#' Float. If set, the gradient of each weight is clipped to be +#' no higher than this value. +#' +#' @param global_clipnorm +#' Float. If set, the gradient of all weights is clipped +#' so that their global norm is no higher than this value. +#' +#' @param use_ema +#' Boolean, defaults to `FALSE`. +#' If `TRUE`, exponential moving average +#' (EMA) is applied. EMA consists of computing an exponential moving +#' average of the weights of the model (as the weight values change after +#' each training batch), and periodically overwriting the weights with +#' their moving average. +#' +#' @param ema_momentum +#' Float, defaults to 0.99. Only used if `use_ema=TRUE`. +#' This is the momentum to use when computing +#' the EMA of the model's weights: +#' `new_average = ema_momentum * old_average + (1 - ema_momentum) * +#' current_variable_value`. +#' +#' @param ema_overwrite_frequency +#' Int or NULL, defaults to NULL. Only used if +#' `use_ema=TRUE`. Every `ema_overwrite_frequency` steps of iterations, +#' we overwrite the model variable by its moving average. +#' If NULL, the optimizer +#' does not overwrite model variables in the middle of training, and you +#' need to explicitly overwrite the variables at the end of training +#' by calling `optimizer$finalize_variable_values()` +#' (which updates the model +#' variables in-place). When using the built-in `fit()` training loop, +#' this happens automatically after the last epoch, +#' and you don't need to do anything. +#' +#' @param loss_scale_factor +#' Float or `NULL`. If a float, the scale factor will +#' be multiplied the loss before computing gradients, and the inverse of +#' the scale factor will be multiplied by the gradients before updating +#' variables. Useful for preventing underflow during mixed precision +#' training. Alternately, `optimizer_loss_scale()` will +#' automatically set a loss scale factor. +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @inheritParams optimizer_adadelta +#' +#' +#' @export +#' @family optimizers +#' @returns an `Optimizer` instance +#' @seealso +#' + +# + +#' +#' @tether keras.optimizers.SGD +optimizer_sgd <- +function (learning_rate = 0.01, momentum = 0, nesterov = FALSE, + weight_decay = NULL, clipnorm = NULL, clipvalue = NULL, global_clipnorm = NULL, + use_ema = FALSE, ema_momentum = 0.99, ema_overwrite_frequency = NULL, + name = "SGD", ..., loss_scale_factor = NULL, + gradient_accumulation_steps = NULL) +{ + args <- capture_args(list(ema_overwrite_frequency = as_integer, + gradient_accumulation_steps = as_integer)) + do.call(keras$optimizers$SGD, args) } diff --git a/R/package.R b/R/package.R index 6054c92a3e..f70b48eea3 100644 --- a/R/package.R +++ b/R/package.R @@ -13,91 +13,49 @@ #' Turing machine. #' - Is capable of running on top of multiple back-ends including #' [TensorFlow](https://github.com/tensorflow/tensorflow), -#' [CNTK](https://github.com/Microsoft/cntk), -#' or [Theano](https://github.com/Theano/Theano). +#' [Jax](https://github.com/google/jax), +#' or [PyTorch](https://github.com/pytorch/pytorch). #' -#' See the package website at for complete documentation. +#' See the package website at for complete documentation. #' -#' @import methods -#' @import R6 -#' @importFrom reticulate import dict iterate import_from_path py_iterator py_call py_capture_output py_get_attr py_has_attr py_is_null_xptr py_to_r r_to_py tuple +#' @importFrom reticulate +#' import import_from_path py_install +#' dict tuple +#' iterate py_iterator iter_next +#' py_call py_eval +#' py_capture_output py_is_null_xptr +#' py_get_attr py_has_attr +#' py_to_r r_to_py +#' np_array #' @importFrom graphics par plot points -#' @importFrom tensorflow tf_version tf_config install_tensorflow -#' @aliases keras-package +#' @importFrom tensorflow tf_version tf_config install_tensorflow all_dims +#' @aliases keras3-package "_PACKAGE" + # package level global state .globals <- new.env(parent = emptyenv()) -#' Select a Keras implementation and backend -#' -#' @param implementation One of "keras" or "tensorflow" (defaults to "keras"). -#' @param backend One of "tensorflow", "cntk", or "theano" (defaults -#' to "tensorflow") -#' -#' @details -#' Keras has multiple implementations (the original keras implementation -#' and the implementation native to TensorFlow) and supports multiple -#' backends ("tensorflow", "cntk", "theano", and "plaidml"). These functions allow -#' switching between the various implementations and backends. -#' -#' The functions should be called after `library(keras)` and before calling -#' other functions within the package (see below for an example). -#' -#' The default implementation and backend should be suitable for most -#' use cases. The "tensorflow" implementation is useful when using Keras -#' in conjunction with TensorFlow Estimators (the \pkg{tfestimators} -#' R package). -#' -#' @examples \dontrun{ -#' # use the tensorflow implementation -#' library(keras) -#' use_implementation("tensorflow") -#' -#' # use the cntk backend -#' library(keras) -#' use_backend("theano") -#' } -#' -#' @export -use_implementation <- function(implementation = c("keras", "tensorflow")) { - Sys.setenv(KERAS_IMPLEMENTATION = match.arg(implementation)) -} - - -#' @rdname use_implementation -#' @export -use_backend <- function(backend = c("tensorflow", "cntk", "theano", "plaidml")) { - backend <- match.arg(backend) - if (backend == "plaidml") { - pml_keras <- import("plaidml.keras", delay_load = list( - priority = 20 - )) - pml_keras$install_backend() - } else { - Sys.setenv(KERAS_BACKEND = match.arg(backend)) - } - if (backend != "tensorflow") use_implementation("keras") -} - - #' Main Keras module #' -#' #' The `keras` module object is the equivalent of -#' `keras <- tensorflow::tf$keras` and provided mainly as a convenience. +#' `reticulate::import("keras")` and provided mainly as a convenience. #' -#' @return the keras Python module +#' @returns the keras Python module #' @export +#' @usage NULL +#' @format An object of class `python.builtin.module` keras <- NULL - .onLoad <- function(libname, pkgname) { - # resolve the implementation module (might be keras proper or might be tensorflow) - implementation_module <- resolve_implementation_module() + # TODO: remove this requireNamespace() + # tensorflow:::.onLoad() registers some reticulate class filter hooks + # we need to identify tensors reliably. + requireNamespace("tensorflow", quietly = TRUE) + maybe_register_S3_methods() # if KERAS_PYTHON is defined then forward it to RETICULATE_PYTHON keras_python <- get_keras_python() @@ -105,26 +63,25 @@ keras <- NULL Sys.setenv(RETICULATE_PYTHON = keras_python) # delay load keras - keras <<- import(implementation_module, delay_load = list( + try(keras <<- import("keras", delay_load = list( - priority = 10, + priority = 10, # tensorflow priority == 5 - environment = "r-reticulate", + environment = "r-keras", - get_module = function() { - resolve_implementation_module() - }, + # get_module = function() { + # resolve_implementation_module() + # }, on_load = function() { # check version - check_implementation_version() + # check_implementation_version() - # patch progress bar for interactive/tty sessions - if ((interactive() || isatty(stdout())) && keras_version() >= "2.0.9") { - python_path <- system.file("python", package = "keras") - tools <- import_from_path("kerastools", path = python_path) - tools$progbar$apply_patch() - } + tryCatch( + import("tensorflow")$experimental$numpy$experimental_enable_numpy_behavior(), + error = function(e) { + warning("failed setting experimental_enable_numpy_behavior") + }) }, @@ -139,34 +96,37 @@ keras <- NULL } } } - )) + ))) # register class filter to alias classes to 'keras' - reticulate::register_class_filter(function(classes) { - - module <- resolve_implementation_module() - - if (identical(module, "tensorflow.keras")) - module <- "tensorflow.python.keras" - - classes <- sub(paste0("^", module), "keras", classes) - - - # let KerasTensor inherit all the S3 methods of tf.Tensor, but - # KerasTensor methods take precedence. - if("keras.engine.keras_tensor.KerasTensor" %in% classes) - classes <- unique(c("keras.engine.keras_tensor.KerasTensor", - "tensorflow.tensor", - classes)) - classes - }) + # reticulate::register_class_filter(function(classes) { + # + # module <- resolve_implementation_module() + # + # if (identical(module, "tensorflow.keras")) + # module <- "tensorflow.python.keras" + # + # # replace "tensorflow.python.keras.*" with "keras.*" + # classes <- sub(paste0("^", module), "keras", classes) + # + # # All python symbols moved in v2.13 under .src + # classes <- sub("^keras\\.src\\.", "keras.", classes) + # + # # let KerasTensor inherit all the S3 methods of tf.Tensor, but + # # KerasTensor methods take precedence. + # if(any("keras.engine.keras_tensor.KerasTensor" %in% classes)) + # classes <- unique(c("keras.engine.keras_tensor.KerasTensor", + # "tensorflow.tensor", + # classes)) + # classes + # }) # tensorflow use_session hooks setHook("tensorflow.on_before_use_session", tensorflow_on_before_use_session) setHook("tensorflow.on_use_session", tensorflow_on_use_session) - registerS3method("[[", "python_class_super", active_extract2) - registerS3method("$", "python_class_super", active_extract2) + # on_load_make_as_activation() + } @@ -175,40 +135,42 @@ keras_not_found_message <- function(error_message) { message("Use the install_keras() function to install the core Keras library") } -active_extract2 <- function(x, name) { - name <- switch(name, - "initialize" = "__init__", - "finalize" = "__del__", - name) - cl <- substitute(x()$name, list(x = substitute(x), name = as.symbol(name))) - eval.parent(cl) -} - +maybe_register_S3_methods <- function() { + # Tensorflow 2.16 exports these methods, but we don't need to + # take a dep on TF>=2.16. So we conditionally export them if installed + # tensorflow package is older. This is to avoid a warning about + # overwritten S3 methods on package load. + .register_no_overwrite <- function(class) { + if (is.null(utils::getS3method("py_to_r", class, optional = TRUE, + envir = asNamespace("reticulate")))) { + + # __ instead of . to avoid a roxygen warning about unexported S3 methods + method <- get(paste0("py_to_r__", class)) + registerS3method("py_to_r", class, method, + envir = asNamespace("reticulate")) + } + } + .register_no_overwrite("keras.src.utils.tracking.TrackedDict") + .register_no_overwrite("keras.src.utils.tracking.TrackedList") + .register_no_overwrite("keras.src.utils.tracking.TrackedSet") +} resolve_implementation_module <- function() { # determine implementation to use - implementation <- get_keras_implementation() + module <- get_keras_implementation() # set the implementation module - if (identical(implementation, "tensorflow")) - implementation_module <- "tensorflow.keras" - else - implementation_module <- implementation + if (identical(module, "tensorflow")) + module <- "tensorflow.keras" # return implementation_module - implementation_module + module } -get_keras_implementation <- function(default = "tensorflow") { - out <- get_keras_option("KERAS_IMPLEMENTATION", default = default) - if (out != "tensorflow") - rlang::warn(c( - paste0("We no longer support the '", out, "' Keras implementation."), - "Use Sys.setenv(KERAS_IMPLEMENTATION='tensorflow') or unset that environment variable." - ), .frequency = "once", .frequency_id = "implementation") - out +get_keras_implementation <- function(default = "keras") { + get_keras_option("KERAS_IMPLEMENTATION", default = default) } get_keras_python <- function(default = NULL) { @@ -260,7 +222,7 @@ check_implementation_version <- function() { name <- "Keras" ver <- keras_version() required_ver <- "2.0.0" - update_with <- "keras::install_keras()" + update_with <- "keras3::install_keras()" } # check version if we can @@ -276,7 +238,58 @@ check_implementation_version <- function() { # Current version of Keras keras_version <- function() { - ver <- keras$`__version__` - ver <- regmatches(ver, regexec("^([0-9\\.]+).*$", ver))[[1]][[2]] + if(keras$`__name__` == "keras_core") + return(package_version("3.0.0")) + ver <- + as_r_value(py_get_attr(keras, "__version__", TRUE)) %||% + tensorflow::tf_config()$version_str + ver <- gsub("[^0-9.-]+", ".", as.character(ver), perl = TRUE) + ver <- gsub("[.-]+", ".", ver, perl = TRUE) package_version(ver) } + + + +#' Check if Keras is Available +#' +#' Probe to see whether the Keras Python package is available in the current +#' system environment. +#' +#' @param version Minimum required version of Keras (defaults to `NULL`, no +#' required version). +#' +#' @returns Logical indicating whether Keras (or the specified minimum version of +#' Keras) is available. +#' +#' @examples +#' \dontrun{ +#' # testthat utilty for skipping tests when Keras isn't available +#' skip_if_no_keras <- function(version = NULL) { +#' if (!is_keras_available(version)) +#' skip("Required keras version not available for testing") +#' } +#' +#' # use the function within a test +#' test_that("keras function works correctly", { +#' skip_if_no_keras() +#' # test code here +#' }) +#' } +#' +#' @noRd +# @export +is_keras_available <- function(version = NULL) { + implementation_module <- resolve_implementation_module() + if (reticulate::py_module_available(implementation_module)) { + if (!is.null(version)) + keras_version() >= version + else + TRUE + } else { + FALSE + } +} + +# TODO: add option in `is_keras_available()` to avoid initializing Python +# (maybe in a callr call?), reexport. +# TODO: add func `is_backend_available()`, usage `is_backend_available("tensorflow")` diff --git a/R/preprocessing-image.R b/R/preprocessing-image.R new file mode 100644 index 0000000000..83014ef62c --- /dev/null +++ b/R/preprocessing-image.R @@ -0,0 +1,87 @@ + + +#' Resize images to a target size without aspect ratio distortion. +#' +#' @description +#' Image datasets typically yield images that have each a different +#' size. However, these images need to be batched before they can be +#' processed by Keras layers. To be batched, images need to share the same +#' height and width. +#' +#' You could simply do, in TF (or JAX equivalent): +#' +#' ```{r, eval = FALSE} +#' size <- c(200, 200) +#' ds <- ds$map(\(img) tf$image$resize(img, size)) +#' ``` +#' +#' However, if you do this, you distort the aspect ratio of your images, since +#' in general they do not all have the same aspect ratio as `size`. This is +#' fine in many cases, but not always (e.g. for image generation models +#' this can be a problem). +#' +#' Note that passing the argument `preserve_aspect_ratio = TRUE` to `tf$image$resize()` +#' will preserve the aspect ratio, but at the cost of no longer respecting the +#' provided target size. +#' +#' This calls for: +#' +#' ```{r, eval = FALSE} +#' size <- c(200, 200) +#' ds <- ds$map(\(img) image_smart_resize(img, size)) +#' ``` +#' +#' Your output images will actually be `(200, 200)`, and will not be distorted. +#' Instead, the parts of the image that do not fit within the target size +#' get cropped out. +#' +#' The resizing process is: +#' +#' 1. Take the largest centered crop of the image that has the same aspect +#' ratio as the target size. For instance, if `size = c(200, 200)` and the input +#' image has size `(340, 500)`, we take a crop of `(340, 340)` centered along +#' the width. +#' 2. Resize the cropped image to the target size. In the example above, +#' we resize the `(340, 340)` crop to `(200, 200)`. +#' +#' @returns +#' Array with shape `(size[1], size[2], channels)`. +#' If the input image was an array, the output is an array, +#' and if it was a backend-native tensor, +#' the output is a backend-native tensor. +#' +#' @param x +#' Input image or batch of images (as a tensor or array). +#' Must be in format `(height, width, channels)` +#' or `(batch_size, height, width, channels)`. +#' +#' @param size +#' Tuple of `(height, width)` integer. Target size. +#' +#' @param interpolation +#' String, interpolation to use for resizing. +#' Defaults to `'bilinear'`. +#' Supports `bilinear`, `nearest`, `bicubic`, +#' `lanczos3`, `lanczos5`. +#' +#' @param data_format +#' `"channels_last"` or `"channels_first"`. +#' +#' @param backend_module +#' Backend module to use (if different from the default +#' backend). +#' +#' @export +#' @family image utils +#' @family utils +#' @family preprocessing +# @seealso +# + +#' @tether keras.preprocessing.image.smart_resize +image_smart_resize <- +function (x, size, interpolation = "bilinear", data_format = "channels_last", + backend_module = NULL) +{ + args <- capture_args(list(size = as_integer)) + do.call(keras$preprocessing$image$smart_resize, args) +} diff --git a/R/preprocessing.R b/R/preprocessing.R deleted file mode 100644 index 9ec6f31a02..0000000000 --- a/R/preprocessing.R +++ /dev/null @@ -1,1434 +0,0 @@ - -#' Pads sequences to the same length -#' -#' @details This function transforms a list of `num_samples` sequences (lists -#' of integers) into a matrix of shape `(num_samples, num_timesteps)`. -#' `num_timesteps` is either the `maxlen` argument if provided, or the length -#' of the longest sequence otherwise. -#' -#' Sequences that are shorter than `num_timesteps` are padded with `value` at -#' the end. -#' -#' Sequences longer than `num_timesteps` are truncated so that they fit the -#' desired length. The position where padding or truncation happens is -#' determined by the arguments `padding` and `truncating`, respectively. -#' -#' Pre-padding is the default. -#' -#' @param sequences List of lists where each element is a sequence -#' @param maxlen int, maximum length of all sequences -#' @param dtype type of the output sequences -#' @param padding 'pre' or 'post', pad either before or after each sequence. -#' @param truncating 'pre' or 'post', remove values from sequences larger than -#' maxlen either in the beginning or in the end of the sequence -#' @param value float, padding value -#' -#' @return Matrix with dimensions (number_of_sequences, maxlen) -#' -#' @family text preprocessing -#' -#' @export -pad_sequences <- function(sequences, maxlen = NULL, dtype = "int32", padding = "pre", - truncating = "pre", value = 0.0) { - - # force length-1 sequences to list (so they aren't treated as scalars) - if (is.list(sequences)) { - sequences <- lapply(sequences, function(seq) { - if (length(seq) == 1) - as.list(seq) - else - seq - }) - } - - keras$preprocessing$sequence$pad_sequences( - sequences = sequences, - maxlen = as_nullable_integer(maxlen), - dtype = dtype, - padding = padding, - truncating = truncating, - value = value - ) -} - -#' Generates skipgram word pairs. -#' -#' @details -#' This function transforms a list of word indexes (lists of integers) -#' into lists of words of the form: -#' -#' - (word, word in the same window), with label 1 (positive samples). -#' - (word, random word from the vocabulary), with label 0 (negative samples). -#' -#' Read more about Skipgram in this gnomic paper by Mikolov et al.: -#' [Efficient Estimation of Word Representations in Vector Space](https://arxiv.org/pdf/1301.3781v3.pdf) -#' -#' @param sequence A word sequence (sentence), encoded as a list of word indices -#' (integers). If using a `sampling_table`, word indices are expected to match -#' the rank of the words in a reference dataset (e.g. 10 would encode the -#' 10-th most frequently occuring token). Note that index 0 is expected to be -#' a non-word and will be skipped. -#' @param vocabulary_size Int, maximum possible word index + 1 -#' @param window_size Int, size of sampling windows (technically half-window). -#' The window of a word `w_i` will be `[i-window_size, i+window_size+1]` -#' @param negative_samples float >= 0. 0 for no negative (i.e. random) samples. 1 -#' for same number as positive samples. -#' @param shuffle whether to shuffle the word couples before returning them. -#' @param categorical bool. if `FALSE`, labels will be integers (eg. `[0, 1, 1 .. ]`), -#' if `TRUE` labels will be categorical eg. `[[1,0],[0,1],[0,1] .. ]` -#' @param sampling_table 1D array of size `vocabulary_size` where the entry i -#' encodes the probabibily to sample a word of rank i. -#' @param seed Random seed -#' -#' @return List of `couples`, `labels` where: -#' - `couples` is a list of 2-element integer vectors: `[word_index, other_word_index]`. -#' - `labels` is an integer vector of 0 and 1, where 1 indicates that `other_word_index` -#' was found in the same window as `word_index`, and 0 indicates that `other_word_index` -#' was random. -#' - if `categorical` is set to `TRUE`, the labels are categorical, ie. 1 becomes `[0,1]`, -#' and 0 becomes `[1, 0]`. -#' -#' @family text preprocessing -#' -#' @export -skipgrams <- function(sequence, vocabulary_size, window_size = 4, negative_samples = 1.0, - shuffle = TRUE, categorical = FALSE, sampling_table = NULL, seed = NULL) { - - args <- list( - sequence = as.integer(sequence), - vocabulary_size = as.integer(vocabulary_size), - window_size = as.integer(window_size), - negative_samples = negative_samples, - shuffle = shuffle, - categorical = categorical, - sampling_table = sampling_table - ) - - if (keras_version() >= "2.0.7") - args$seed <- as_nullable_integer(seed) - - sg <- do.call(keras$preprocessing$sequence$skipgrams, args) - - sg <- list( - couples = sg[[1]], - labels = sg[[2]] - ) -} - - -#' Generates a word rank-based probabilistic sampling table. -#' -#' @details -#' -#' Used for generating the `sampling_table` argument for [skipgrams()]. -#' `sampling_table[[i]]` is the probability of sampling the word i-th most common -#' word in a dataset (more common words should be sampled less frequently, for balance). -#' -#' The sampling probabilities are generated according to the sampling distribution used in word2vec: -#' -#' `p(word) = min(1, sqrt(word_frequency / sampling_factor) / (word_frequency / sampling_factor))` -#' -#' We assume that the word frequencies follow Zipf's law (s=1) to derive a -#' numerical approximation of frequency(rank): -#' -#' `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))` -#' -#' where `gamma` is the Euler-Mascheroni constant. -#' -#' @param size Int, number of possible words to sample. -#' @param sampling_factor The sampling factor in the word2vec formula. -#' -#' @return An array of length `size` where the ith entry is the -#' probability that a word of rank i should be sampled. -#' -#' @note The word2vec formula is: p(word) = min(1, -#' sqrt(word.frequency/sampling_factor) / (word.frequency/sampling_factor)) -#' -#' @family text preprocessing -#' -#' @export -make_sampling_table <- function(size, sampling_factor = 1e-05) { - keras$preprocessing$sequence$make_sampling_table( - size = as.integer(size), - sampling_factor = sampling_factor - ) -} - -#' Convert text to a sequence of words (or tokens). -#' -#' @param text Input text (string). -#' @param filters Sequence of characters to filter out such as -#' punctuation. Default includes basic punctuation, tabs, and newlines. -#' @param lower Whether to convert the input to lowercase. -#' @param split Sentence split marker (string). -#' -#' @return Words (or tokens) -#' -#' @family text preprocessing -#' -#' @export -text_to_word_sequence <- function(text, filters = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower = TRUE, split=' ') { - keras$preprocessing$text$text_to_word_sequence( - text = text, - filters = filters, - lower = lower, - split = split - ) -} - -#' One-hot encode a text into a list of word indexes in a vocabulary of size n. -#' -#' @param n Size of vocabulary (integer) -#' @param input_text Input text (string). -#' @inheritParams text_to_word_sequence -#' @param text for compatibility purpose. use `input_text` instead. -#' -#' @return List of integers in `[1, n]`. Each integer encodes a word (unicity -#' non-guaranteed). -#' -#' @family text preprocessing -#' -#' @export -text_one_hot <- function(input_text, n, filters = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower = TRUE, split = ' ', text = NULL) { - - if (tensorflow::tf_version() >= "2.3" && !is.null(text)) { - warning("text is deprecated as of TF 2.3. use input_text instead") - if (!missing(input_text)) - stop("input_text and text must not be bopth specified") - input_text <- text - } - - keras$preprocessing$text$one_hot( - input_text, - n = as.integer(n), - filters = filters, - lower = lower, - split = split - ) -} - -#' Converts a text to a sequence of indexes in a fixed-size hashing space. -#' -#' @param text Input text (string). -#' @param n Dimension of the hashing space. -#' @param hash_function if `NULL` uses the Python `hash()` function. Otherwise can be `'md5'` or -#' any function that takes in input a string and returns an int. Note that -#' `hash` is not a stable hashing function, so it is not consistent across -#' different runs, while `'md5'` is a stable hashing function. -#' @param filters Sequence of characters to filter out such as -#' punctuation. Default includes basic punctuation, tabs, and newlines. -#' @param lower Whether to convert the input to lowercase. -#' @param split Sentence split marker (string). -#' -#' @return A list of integer word indices (unicity non-guaranteed). -#' -#' @details -#' Two or more words may be assigned to the same index, due to possible -#' collisions by the hashing function. -#' -#' @family text preprocessing -#' -#' @export -text_hashing_trick <- function(text, n, - hash_function = NULL, - filters = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower = TRUE, split = ' ') { - if (length(text) != 1) { - stop("`text` should be length 1.") - } - if (is.na(text)) { - return(NA_integer_) - } - keras$preprocessing$text$hashing_trick( - text = text, - n = as.integer(n), - hash_function = hash_function, - filters = filters, - lower = lower, - split = split - ) -} - - - -#' Text tokenization utility -#' -#' Vectorize a text corpus, by turning each text into either a sequence of -#' integers (each integer being the index of a token in a dictionary) or into a -#' vector where the coefficient for each token could be binary, based on word -#' count, based on tf-idf... -#' -#' @details By default, all punctuation is removed, turning the texts into -#' space-separated sequences of words (words maybe include the ' character). -#' These sequences are then split into lists of tokens. They will then be -#' indexed or vectorized. `0` is a reserved index that won't be assigned to any -#' word. -#' -#' @param num_words the maximum number of words to keep, based on word -#' frequency. Only the most common `num_words` words will be kept. -#' @param filters a string where each element is a character that will be -#' filtered from the texts. The default is all punctuation, plus tabs and line -#' breaks, minus the ' character. -#' @param lower boolean. Whether to convert the texts to lowercase. -#' @param split character or string to use for token splitting. -#' @param char_level if `TRUE`, every character will be treated as a token -#' @param oov_token `NULL` or string If given, it will be added to `word_index`` -#' and used to replace out-of-vocabulary words during text_to_sequence calls. -#' -#' @section Attributes: -#' The tokenizer object has the following attributes: -#' - `word_counts` --- named list mapping words to the number of times they appeared -#' on during fit. Only set after `fit_text_tokenizer()` is called on the tokenizer. -#' - `word_docs` --- named list mapping words to the number of documents/texts they -#' appeared on during fit. Only set after `fit_text_tokenizer()` is called on the tokenizer. -#' - `word_index` --- named list mapping words to their rank/index (int). Only set -#' after `fit_text_tokenizer()` is called on the tokenizer. -#' - `document_count` --- int. Number of documents (texts/sequences) the tokenizer -#' was trained on. Only set after `fit_text_tokenizer()` is called on the tokenizer. -#' -#' @family text tokenization -#' -#' @export -text_tokenizer <- function(num_words = NULL, filters = '!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower = TRUE, split = ' ', char_level = FALSE, oov_token = NULL) { - args <- list( - num_words = as_nullable_integer(num_words), - filters = filters, - lower = lower, - split = split, - char_level = char_level - ) - - if (keras_version() >= "2.1.3") - args$oov_token <- oov_token - - do.call(keras$preprocessing$text$Tokenizer, args) -} - -#' Update tokenizer internal vocabulary based on a list of texts or list of -#' sequences. -#' -#' @param object Tokenizer returned by [text_tokenizer()] -#' @param x Vector/list of strings, or a generator of strings (for -#' memory-efficiency); Alternatively a list of "sequence" (a sequence is a -#' list of integer word indices). -#' -#' @note Required before using [texts_to_sequences()], [texts_to_matrix()], or -#' [sequences_to_matrix()]. -#' -#' @family text tokenization -#' -#' @export -fit_text_tokenizer <- function(object, x) { - tokenizer <- object - if (is.list(x)) - tokenizer$fit_on_sequences(x) - else { - tokenizer$fit_on_texts(if (is.function(x)) reticulate::py_iterator(x) else as_texts(x)) - } - invisible(tokenizer) -} - - -#' Save a text tokenizer to an external file -#' -#' Enables persistence of text tokenizers alongside saved models. -#' -#' @details -#' You should always use the same text tokenizer for training and -#' prediction. In many cases however prediction will occur in another -#' session with a version of the model loaded via [load_model_hdf5()]. -#' -#' In this case you need to save the text tokenizer object after training -#' and then reload it prior to prediction. -#' -#' @param object Text tokenizer fit with [fit_text_tokenizer()] -#' @param filename File to save/load -#' -#' @family text tokenization -#' -#' @examples \dontrun{ -#' -#' # vectorize texts then save for use in prediction -#' tokenizer <- text_tokenizer(num_words = 10000) %>% -#' fit_text_tokenizer(tokenizer, texts) -#' save_text_tokenizer(tokenizer, "tokenizer") -#' -#' # (train model, etc.) -#' -#' # ...later in another session -#' tokenizer <- load_text_tokenizer("tokenizer") -#' -#' # (use tokenizer to preprocess data for prediction) -#' -#' } -#' -#' @importFrom reticulate py_save_object -#' @export -save_text_tokenizer <- function(object, filename) { - py_save_object(object, filename) - invisible(object) -} - - -#' @importFrom reticulate py_load_object -#' @rdname save_text_tokenizer -#' @export -load_text_tokenizer <- function(filename) { - py_load_object(filename) -} - - - - - - -#' Transform each text in texts in a sequence of integers. -#' -#' Only top "num_words" most frequent words will be taken into account. -#' Only words known by the tokenizer will be taken into account. -#' -#' @param tokenizer Tokenizer -#' @param texts Vector/list of texts (strings). -#' -#' @family text tokenization -#' -#' @export -texts_to_sequences <- function(tokenizer, texts) { - tokenizer$texts_to_sequences(as_texts(texts)) -} - -#' Transforms each text in texts in a sequence of integers. -#' -#' Only top "num_words" most frequent words will be taken into account. -#' Only words known by the tokenizer will be taken into account. -#' -#' @inheritParams texts_to_sequences -#' -#' @return Generator which yields individual sequences -#' -#' @family text tokenization -#' -#' @export -texts_to_sequences_generator <- function(tokenizer, texts) { - tokenizer$texts_to_sequences_generator(as_texts(texts)) -} - - -#' Convert a list of texts to a matrix. -#' -#' @inheritParams texts_to_sequences -#' -#' @param mode one of "binary", "count", "tfidf", "freq". -#' -#' @return A matrix -#' -#' @family text tokenization -#' -#' @export -texts_to_matrix <- function(tokenizer, texts, mode = c("binary", "count", "tfidf", "freq")) { - tokenizer$texts_to_matrix( - texts = as_texts(texts), - mode = mode - ) -} - -as_texts <- function(texts) { - if (is.character(texts) && length(texts) == 1) - as.array(texts) - else - texts -} - - -#' Convert a list of sequences into a matrix. -#' -#' @inheritParams texts_to_matrix -#' -#' @param sequences List of sequences (a sequence is a list of integer word indices). -#' -#' @return A matrix -#' -#' @family text tokenization -#' -#' @export -sequences_to_matrix <- function(tokenizer, sequences, mode = c("binary", "count", "tfidf", "freq")) { - - # force length-1 sequences to list (so they aren't treated as scalars) - if (is.list(sequences)) { - sequences <- lapply(sequences, function(seq) { - if (length(seq) == 1) - as.list(seq) - else - seq - }) - } - - tokenizer$sequences_to_matrix( - sequences = sequences, - mode = mode - ) -} - - -#' Loads an image into PIL format. -#' -#' @param path Path to image file -#' @param grayscale DEPRECATED use `color_mode="grayscale"` -#' @param color_mode One of {`"grayscale"`, `"rgb"`, `"rgba"`}. -#' Default: `"rgb"`. The desired image format. -#' @param target_size Either `NULL` (default to original size) or integer vector -#' `(img_height, img_width)`. -#' @param interpolation Interpolation method used to resample the image if the -#' target size is different from that of the loaded image. Supported methods -#' are "nearest", "bilinear", and "bicubic". If PIL version 1.1.3 or newer is -#' installed, "lanczos" is also supported. If PIL version 3.4.0 or newer is -#' installed, "box" and "hamming" are also supported. By default, "nearest" -#' is used. -#' -#' @return A PIL Image instance. -#' -#' @family image preprocessing -#' -#' @export -image_load <- function(path, grayscale = FALSE, color_mode='rgb', - target_size = NULL, - interpolation = "nearest") { - - if (!have_pillow()) - stop("The Pillow Python package is required to load images") - - # normalize target_size - if (!is.null(target_size)) { - if (length(target_size) != 2) - stop("target_size must be 2 element integer vector") - target_size <- as.integer(target_size) - target_size <- tuple(target_size[[1]], target_size[[2]]) - } - - args <- list( - path = normalize_path(path), - color_mode = color_mode, - grayscale = grayscale, - target_size = target_size - ) - - if (keras_version() >= "2.0.9") - args$interpolation <- interpolation - - do.call(keras$preprocessing$image$load_img, args) -} - - - -#' 3D array representation of images -#' -#' 3D array that represents an image with dimensions (height,width,channels) or -#' (channels,height,width) depending on the data_format. -#' -#' @param img Image -#' @param path Path to save image to -#' @param width Width to resize to -#' @param height Height to resize to -#' @param data_format Image data format ("channels_last" or "channels_first") -#' @param file_format Optional file format override. If omitted, the format to -#' use is determined from the filename extension. If a file object was used -#' instead of a filename, this parameter should always be used. -#' @param scale Whether to rescale image values to be within 0,255 -#' -#' @family image preprocessing -#' -#' @export -image_to_array <- function(img, data_format = c("channels_last", "channels_first")) { - keras$preprocessing$image$img_to_array( - img = img, - data_format = match.arg(data_format) - ) -} - -#' @rdname image_to_array -#' @export -image_array_resize <- function(img, height, width, - data_format = c("channels_last", "channels_first")) { - - # imports - np <- import("numpy") - scipy <- import("scipy") - - # make copy as necessary - img <- np$copy(img) - - # capture dimensions and reduce to 3 if necessary - dims <- dim(img) - is_4d_array <- FALSE - if (length(dims) == 4 && dims[[1]] == 1) { - is_4d_array <- TRUE - img <- array_reshape(img, dims[-1]) - } - - # calculate zoom factors (invert the dimensions to reflect height,width - # order of numpy/scipy array represenations of images) - data_format <- match.arg(data_format) - if (data_format == "channels_last") { - factors <- tuple( - height / dim(img)[[1]], - width / dim(img)[[2]], - 1 - ) - } else { - factors <- tuple( - 1, - height / dim(img)[[1]], - width / dim(img)[[2]], - ) - } - - # zoom - img <- scipy$ndimage$zoom(img, factors, order = 1L) - - # reshape if necessary - if (is_4d_array) - img <- array_reshape(img, dim = c(1, dim(img))) - - # return - img -} - -#' @rdname image_to_array -#' @export -image_array_save <- function(img, path, data_format = NULL, file_format = NULL, scale = TRUE) { - if (keras_version() >= "2.2.0") { - keras$preprocessing$image$save_img( - path, img, - data_format = data_format, - file_format = file_format, - scale = scale - ) - } else { - pil <- import("PIL") - pil$Image$fromarray(reticulate::r_to_py(img)$astype("uint8"))$save(path) - } -} - - - - -#' Generate batches of image data with real-time data augmentation. The data will be -#' looped over (in batches). -#' -#' @param featurewise_center Set input mean to 0 over the dataset, feature-wise. -#' @param samplewise_center Boolean. Set each sample mean to 0. -#' @param featurewise_std_normalization Divide inputs by std of the dataset, feature-wise. -#' @param samplewise_std_normalization Divide each input by its std. -#' @param zca_whitening apply ZCA whitening. -#' @param zca_epsilon Epsilon for ZCA whitening. Default is 1e-6. -#' @param rotation_range degrees (0 to 180). -#' @param width_shift_range fraction of total width. -#' @param height_shift_range fraction of total height. -#' @param brightness_range the range of brightness to apply -#' @param shear_range shear intensity (shear angle in radians). -#' @param zoom_range amount of zoom. if scalar z, zoom will be randomly picked -#' in the range `[1-z, 1+z]`. A sequence of two can be passed instead to select -#' this range. -#' @param channel_shift_range shift range for each channels. -#' @param fill_mode One of "constant", "nearest", "reflect" or "wrap". -#' Points outside the boundaries of the input are filled according to -#' the given mode: -#' - "constant": `kkkkkkkk|abcd|kkkkkkkk` (`cval=k`) -#' - "nearest": `aaaaaaaa|abcd|dddddddd` -#' - "reflect": `abcddcba|abcd|dcbaabcd` -#' - "wrap": `abcdabcd|abcd|abcdabcd` -#' @param cval value used for points outside the boundaries when fill_mode is -#' 'constant'. Default is 0. -#' @param horizontal_flip whether to randomly flip images horizontally. -#' @param vertical_flip whether to randomly flip images vertically. -#' @param rescale rescaling factor. If NULL or 0, no rescaling is applied, -#' otherwise we multiply the data by the value provided (before applying any -#' other transformation). -#' @param preprocessing_function function that will be implied on each input. -#' The function will run before any other modification on it. The function -#' should take one argument: one image (tensor with rank 3), and should -#' output a tensor with the same shape. -#' @param data_format 'channels_first' or 'channels_last'. In 'channels_first' -#' mode, the channels dimension (the depth) is at index 1, in 'channels_last' -#' mode it is at index 3. It defaults to the `image_data_format` value found -#' in your Keras config file at `~/.keras/keras.json`. If you never set it, -#' then it will be "channels_last". -#' @param validation_split fraction of images reserved for validation (strictly between 0 and 1). -#' -#' @export -image_data_generator <- function(featurewise_center = FALSE, samplewise_center = FALSE, - featurewise_std_normalization = FALSE, samplewise_std_normalization = FALSE, - zca_whitening = FALSE, zca_epsilon = 1e-6, rotation_range = 0.0, width_shift_range = 0.0, - height_shift_range = 0.0, brightness_range = NULL, shear_range = 0.0, zoom_range = 0.0, channel_shift_range = 0.0, - fill_mode = "nearest", cval = 0.0, horizontal_flip = FALSE, vertical_flip = FALSE, - rescale = NULL, preprocessing_function = NULL, data_format = NULL, validation_split=0.0) { - args <- list( - featurewise_center = featurewise_center, - samplewise_center = samplewise_center, - featurewise_std_normalization = featurewise_std_normalization, - samplewise_std_normalization = samplewise_std_normalization, - zca_whitening = zca_whitening, - rotation_range = rotation_range, - width_shift_range = width_shift_range, - height_shift_range = height_shift_range, - shear_range = shear_range, - zoom_range = zoom_range, - channel_shift_range = channel_shift_range, - fill_mode = fill_mode, - cval = cval, - horizontal_flip = horizontal_flip, - vertical_flip = vertical_flip, - rescale = rescale, - preprocessing_function = preprocessing_function, - data_format = data_format - ) - if (keras_version() >= "2.0.4") - args$zca_epsilon <- zca_epsilon - if (keras_version() >= "2.1.5") { - args$brightness_range <- brightness_range - args$validation_split <- validation_split - } - - do.call(keras$preprocessing$image$ImageDataGenerator, args) - -} - - -#' Retrieve the next item from a generator -#' -#' Use to retrieve items from generators (e.g. [image_data_generator()]). Will return -#' either the next item or `NULL` if there are no more items. -#' -#' @param generator Generator -#' @param completed Sentinel value to return from `generator_next()` if the iteration -#' completes (defaults to `NULL` but can be any R value you specify). -#' -#' @export -generator_next <- function(generator, completed = NULL) { - reticulate::iter_next(generator, completed = completed) -} - - -#' Fit image data generator internal statistics to some sample data. -#' -#' Required for `featurewise_center`, `featurewise_std_normalization` -#' and `zca_whitening`. -#' -#' @param object [image_data_generator()] -#' @param x array, the data to fit on (should have rank 4). In case of grayscale data, -#' the channels axis should have value 1, and in case of RGB data, it should have value 3. -#' @param augment Whether to fit on randomly augmented samples -#' @param rounds If `augment`, how many augmentation passes to do over the data -#' @param seed random seed. -#' -#' @family image preprocessing -#' -#' @export -fit_image_data_generator <- function(object, x, augment = FALSE, rounds = 1, seed = NULL) { - generator <- object - history <- generator$fit( - x = keras_array(x), - augment = augment, - rounds = as.integer(rounds), - seed = seed - ) - invisible(history) -} - -#' Generates batches of augmented/normalized data from image data and labels -#' -#' @details Yields batches indefinitely, in an infinite loop. -#' -#' @param generator Image data generator to use for augmenting/normalizing image -#' data. -#' @param x data. Should have rank 4. In case of grayscale data, the channels -#' axis should have value 1, and in case of RGB data, it should have value 3. -#' @param y labels (can be `NULL` if no labels are required) -#' @param batch_size int (default: `32`). -#' @param shuffle boolean (defaut: `TRUE`). -#' @param seed int (default: `NULL`). -#' @param save_to_dir `NULL` or str (default: `NULL`). This allows you to -#' optionally specify a directory to which to save the augmented pictures being -#' generated (useful for visualizing what you are doing). -#' @param save_prefix str (default: ''). Prefix to use for filenames of saved -#' pictures (only relevant if `save_to_dir` is set). -#' @param save_format one of "png", "jpeg" (only relevant if save_to_dir is -#' set). Default: "png". -#' @param subset Subset of data (`"training"` or `"validation"`) if -#' `validation_split` is set in [image_data_generator()]. -#' @param sample_weight Sample weights. -#' -#' @section Yields: `(x, y)` where `x` is an array of image data and `y` is a -#' array of corresponding labels. The generator loops indefinitely. -#' -#' @family image preprocessing -#' -#' @export -flow_images_from_data <- function( - x, y = NULL, generator = image_data_generator(), batch_size = 32, - shuffle = TRUE, sample_weight = NULL, seed = NULL, - save_to_dir = NULL, save_prefix = "", save_format = 'png', subset = NULL) { - - args <- list( - x = keras_array(x), - y = keras_array(y), - batch_size = as.integer(batch_size), - shuffle = shuffle, - seed = as_nullable_integer(seed), - save_to_dir = normalize_path(save_to_dir), - save_prefix = save_prefix, - save_format = save_format - ) - stopifnot(args$batch_size > 0) - - if (keras_version() >= "2.1.5") - args$subset <- subset - - if (keras_version() >= "2.2.0") - args$sample_weight <- sample_weight - - do.call(generator$flow, args) -} - -#' Generates batches of data from images in a directory (with optional -#' augmented/normalized data) -#' -#' @details Yields batches indefinitely, in an infinite loop. -#' -#' @inheritParams image_load -#' @inheritParams flow_images_from_data -#' -#' @param generator Image data generator (default generator does no data -#' augmentation/normalization transformations) -#' @param directory path to the target directory. It should contain one -#' subdirectory per class. Any PNG, JPG, BMP, PPM, or TIF images inside each -#' of the subdirectories directory tree will be included in the generator. -#' See [this script](https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) -#' for more details. -#' @param target_size integer vector, default: `c(256, 256)`. The dimensions to -#' which all images found will be resized. -#' @param color_mode one of "grayscale", "rbg". Default: "rgb". Whether the -#' images will be converted to have 1 or 3 color channels. -#' @param classes optional list of class subdirectories (e.g. `c('dogs', -#' 'cats')`). Default: `NULL`, If not provided, the list of classes will be -#' automatically inferred (and the order of the classes, which will map to -#' the label indices, will be alphanumeric). -#' @param class_mode one of "categorical", "binary", "sparse" or `NULL`. -#' Default: "categorical". Determines the type of label arrays that are -#' returned: "categorical" will be 2D one-hot encoded labels, "binary" will -#' be 1D binary labels, "sparse" will be 1D integer labels. If `NULL`, no -#' labels are returned (the generator will only yield batches of image data, -#' which is useful to use [predict_generator()], [evaluate_generator()], -#' etc.). -#' @param follow_links whether to follow symlinks inside class subdirectories -#' (default: `FALSE`) -#' -#' @section Yields: `(x, y)` where `x` is an array of image data and `y` is a -#' array of corresponding labels. The generator loops indefinitely. -#' -#' @family image preprocessing -#' -#' @export -flow_images_from_directory <- function( - directory, generator = image_data_generator(), target_size = c(256, 256), color_mode = "rgb", - classes = NULL, class_mode = "categorical", - batch_size = 32, shuffle = TRUE, seed = NULL, - save_to_dir = NULL, save_prefix = "", save_format = "png", - follow_links = FALSE, subset = NULL, interpolation = "nearest") { - - args <- list( - directory = normalize_path(directory), - target_size = as.integer(target_size), - color_mode = color_mode, - classes = classes, - class_mode = class_mode, - batch_size = as.integer(batch_size), - shuffle = shuffle, - seed = as_nullable_integer(seed), - save_to_dir = normalize_path(save_to_dir), - save_prefix = save_prefix, - save_format = save_format, - follow_links = follow_links - ) - stopifnot(args$batch_size > 0) - - if (keras_version() >= "2.1.2") - args$interpolation <- interpolation - - if (keras_version() >= "2.1.5") - args$subset <- subset - - do.call(generator$flow_from_directory, args) -} - -#' Takes the dataframe and the path to a directory and generates batches of -#' augmented/normalized data. -#' -#' @details Yields batches indefinitely, in an infinite loop. -#' -#' @inheritParams image_load -#' @inheritParams flow_images_from_data -#' -#' @param dataframe `data.frame` containing the filepaths relative to -#' directory (or absolute paths if directory is `NULL`) of the images in a -#' character column. It should include other column/s depending on the -#' `class_mode`: -#' - if `class_mode` is "categorical" (default value) it must -#' include the `y_col` column with the class/es of each image. Values in -#' column can be character/list if a single class or list if multiple classes. -#' - if `class_mode` is "binary" or "sparse" it must include the given -#' `y_col` column with class values as strings. -#' - if `class_mode` is "other" it -#' should contain the columns specified in `y_col`. -#' - if `class_mode` is "input" or NULL no extra column is needed. -#' @param directory character, path to the directory to read images from. -#' If `NULL`, data in `x_col` column should be absolute paths. -#' @param x_col character, column in dataframe that contains the filenames -#' (or absolute paths if directory is `NULL`). -#' @param y_col string or list, column/s in dataframe that has the target data. -#' @param color_mode one of "grayscale", "rgb". Default: "rgb". Whether the -#' images will be converted to have 1 or 3 color channels. -#' @param drop_duplicates (deprecated in TF >= 2.3) Boolean, whether to drop -#' duplicate rows based on filename. The default value is `TRUE`. -#' @param classes optional list of classes (e.g. `c('dogs', 'cats')`. Default: -#' `NULL` If not provided, the list of classes will be automatically inferred -#' from the `y_col`, which will map to the label indices, will be alphanumeric). -#' The dictionary containing the mapping from class names to class indices -#' can be obtained via the attribute `class_indices`. -#' @param class_mode one of "categorical", "binary", "sparse", "input", "other" or None. -#' Default: "categorical". Mode for yielding the targets: -#' * "binary": 1D array of binary labels, -#' * "categorical": 2D array of one-hot encoded labels. Supports multi-label output. -#' * "sparse": 1D array of integer labels, -#' * "input": images identical to input images (mainly used to work with autoencoders), -#' * "other": array of y_col data, -#' * "multi_output": allow to train a multi-output model. Y is a list or a vector. -#' `NULL`, no targets are returned (the generator will only yield batches of -#' image data, which is useful to use in `predict_generator()`). -#' -#' @note -#' This functions requires that `pandas` (Python module) is installed in the -#' same environment as `tensorflow` and `keras`. -#' -#' If you are using `r-tensorflow` (the default environment) you can install -#' `pandas` by running `reticulate::virtualenv_install("pandas", envname = "r-tensorflow")` -#' or `reticulate::conda_install("pandas", envname = "r-tensorflow")` depending on -#' the kind of environment you are using. -#' -#' @section Yields: `(x, y)` where `x` is an array of image data and `y` is a -#' array of corresponding labels. The generator loops indefinitely. -#' -#' @family image preprocessing -#' @export -flow_images_from_dataframe <- function( - dataframe, directory = NULL, x_col = "filename", y_col = "class", - generator = image_data_generator(), target_size = c(256,256), - color_mode = "rgb", classes = NULL, class_mode = "categorical", - batch_size = 32, shuffle = TRUE, seed = NULL, save_to_dir = NULL, - save_prefix = "", save_format = "png", subset = NULL, - interpolation = "nearest", drop_duplicates = NULL) { - - if (!reticulate::py_module_available("pandas")) - stop("Pandas (Python module) must be installed in the same environment as Keras.", - 'Install it using reticulate::virtualenv_install("pandas", envname = "r-tensorflow") ', - 'or reticulate::conda_install("pandas", envname = "r-tensorflow") depending on ', - 'the kind of environment you are using.') - - args <- list( - dataframe = as.data.frame(dataframe), - directory = normalize_path(directory), - x_col = x_col, y_col = y_col, - target_size = as.integer(target_size), - color_mode = color_mode, - classes = classes, - class_mode = class_mode, - batch_size = as.integer(batch_size), - shuffle = shuffle, - seed = as_nullable_integer(seed), - save_to_dir = normalize_path(save_to_dir), - save_prefix = save_prefix, - save_format = save_format, - drop_duplicates = drop_duplicates - ) - stopifnot(args$batch_size > 0) - - if (keras_version() >= "2.1.2") - args$interpolation <- interpolation - - if (keras_version() >= "2.1.5") - args$subset <- subset - - if(!is.null(drop_duplicates) && tensorflow::tf_version() >= "2.3") { - warning("\'drop_duplicates\' is deprecated as of tensorflow 2.3 and will be ignored. Make sure the supplied dataframe does not contain duplicates.") - args$drop_duplicates <- NULL - } - - if (is.null(drop_duplicates) && tensorflow::tf_version() < "2.3") - args$drop_duplicates <- TRUE - - do.call(generator$flow_from_dataframe, args) -} - -#' Create a dataset from a directory -#' -#' Generates a `tf.data.Dataset` from image files in a directory. -#' -#' If your directory structure is: -#' -#' ```` -#' main_directory/ -#' ...class_a/ -#' ......a_image_1.jpg -#' ......a_image_2.jpg -#' ...class_b/ -#' ......b_image_1.jpg -#' ......b_image_2.jpg -#' ```` -#' -#' Then calling `image_dataset_from_directory(main_directory, labels='inferred')` -#' will return a `tf.data.Dataset` that yields batches of images from the -#' subdirectories class_a and class_b, together with labels 0 and 1 (0 -#' corresponding to class_a and 1 corresponding to class_b). -#' -#' Supported image formats: jpeg, png, bmp, gif. Animated gifs are truncated to -#' the first frame. -#' -#' @param directory Directory where the data is located. If labels is -#' "inferred", it should contain subdirectories, each containing images for a -#' class. Otherwise, the directory structure is ignored. -#' @param labels Either "inferred" (labels are generated from the directory -#' structure), or a list/tuple of integer labels of the same size as the -#' number of image files found in the directory. Labels should be sorted -#' according to the alphanumeric order of the image file paths (obtained via -#' os.walk(directory) in Python). -#' @param label_mode Valid values: -#' -#' - 'int': labels are encoded as integers (e.g. -#' for sparse_categorical_crossentropy loss). -#' -#' - 'categorical': labels are encoded as a categorical vector (e.g. for -#' categorical_crossentropy loss). -#' -#' - 'binary': labels (there can be only 2) are encoded as float32 scalars -#' with values 0 or 1 (e.g. for binary_crossentropy). -#' -#' - `NULL`: (no labels). -#' @param class_names Only valid if "labels" is "inferred". This is the explict -#' list of class names (must match names of subdirectories). Used to control -#' the order of the classes (otherwise alphanumerical order is used). -#' @param color_mode One of "grayscale", "rgb", "rgba". Default: "rgb". Whether -#' the images will be converted to have 1, 3, or 4 channels. -#' @param batch_size Size of the batches of data. Default: 32. -#' @param image_size Size to resize images to after they are read from disk. -#' Defaults to (256, 256). Since the pipeline processes batches of images that -#' must all have the same size, this must be provided. -#' @param shuffle Whether to shuffle the data. Default: TRUE. If set to FALSE, -#' sorts the data in alphanumeric order. -#' @param seed Optional random seed for shuffling and transformations. -#' @param validation_split Optional float between 0 and 1, fraction of data to -#' reserve for validation. -#' @param subset One of "training" or "validation". Only used if -#' validation_split is set. -#' @param interpolation String, the interpolation method used when resizing -#' images. Defaults to bilinear. Supports bilinear, nearest, bicubic, area, -#' lanczos3, lanczos5, gaussian, mitchellcubic. -#' @param follow_links Whether to visits subdirectories pointed to by symlinks. -#' Defaults to FALSE. -#' @param crop_to_aspect_ratio If `TRUE`, resize the images without aspect ratio -#' distortion. When the original aspect ratio differs from the target aspect -#' ratio, the output image will be cropped so as to return the largest -#' possible window in the image (of size image_size) that matches the target -#' aspect ratio. By default (crop_to_aspect_ratio=False), aspect ratio may not -#' be preserved. -#' @param ... Legacy arguments -#' -#' -#' @return A tf.data.Dataset object. If label_mode is `NULL`, it yields float32 -#' tensors of shape `(batch_size, image_size[1], image_size[2], num_channels)`, -#' encoding images (see below for rules regarding `num_channels`). -#' -#' Otherwise, it yields pairs of `(images, labels)`, where images has shape -#' `(batch_size, image_size[1], image_size[2], num_channels)`, and labels -#' follows the format described below. -#' -#' Rules regarding labels format: -#' -#' + if label_mode is int, the labels are an int32 tensor of shape -#' `(batch_size)`. -#' -#' + if label_mode is binary, the labels are a float32 tensor of 1s and 0s of -#' shape `(batch_size, 1)`. -#' -#' + if label_mode is categorial, the labels are a float32 tensor of shape -#' `(batch_size, num_classes)`, representing a one-hot encoding of the class -#' index. -#' -#' Rules regarding number of channels in the yielded images: -#' -#' + if color_mode is grayscale, there's 1 channel in the image tensors. -#' -#' + if color_mode is rgb, there are 3 channel in the image tensors. -#' -#' + if color_mode is rgba, there are 4 channel in the image tensors. -#' -#' @seealso -#' @export -image_dataset_from_directory <- function( - directory, - labels="inferred", - label_mode="int", - class_names=NULL, - color_mode="rgb", - batch_size=32, - image_size=c(256, 256), - shuffle=TRUE, - seed=NULL, - validation_split=NULL, - subset=NULL, - interpolation="bilinear", - follow_links=FALSE, - crop_to_aspect_ratio = FALSE, - ... -) { - - args <- capture_args(match.call(), list( - directory = function(d) normalizePath(d, mustWork = FALSE), - batch_size = as.integer, - image_size = as_integer_tuple, - seed = as_nullable_integer, - labels = function(l) if(is.character(l)) l else as.integer(l) - )) - - out <- do.call(keras$preprocessing$image_dataset_from_directory, args) - class(out) <- unique(c("tf_dataset", class(out))) - out -} - - -#' Generate a `tf.data.Dataset` from text files in a directory -#' -#' @details -#' If your directory structure is: -#' -#' ``` -#' main_directory/ -#' ...class_a/ -#' ......a_text_1.txt -#' ......a_text_2.txt -#' ...class_b/ -#' ......b_text_1.txt -#' ......b_text_2.txt -#' ``` -#' -#' Then calling `text_dataset_from_directory(main_directory, labels = 'inferred')` -#' will return a `tf.data.Dataset` that yields batches of texts from -#' the subdirectories `class_a` and `class_b`, together with labels -#' 0 and 1 (0 corresponding to `class_a` and 1 corresponding to `class_b`). -#' -#' Only `.txt` files are supported at this time. -#' -#' @param directory Directory where the data is located. -#' If `labels` is "inferred", it should contain -#' subdirectories, each containing text files for a class. -#' Otherwise, the directory structure is ignored. -#' -#' @param labels Either "inferred" -#' (labels are generated from the directory structure), -#' NULL (no labels), -#' or a list of integer labels of the same size as the number of -#' text files found in the directory. Labels should be sorted according -#' to the alphanumeric order of the text file paths -#' (obtained via `os.walk(directory)` in Python). -#' -#' @param label_mode - `'int'`: means that the labels are encoded as integers -#' (e.g. for `sparse_categorical_crossentropy` loss). -#' - `'categorical'` means that the labels are -#' encoded as a categorical vector -#' (e.g. for `categorical_crossentropy` loss). -#' - `'binary'` means that the labels (there can be only 2) -#' are encoded as `float32` scalars with values 0 or 1 -#' (e.g. for `binary_crossentropy`). -#' - `NULL` (no labels). -#' -#' @param class_names Only valid if `labels` is `"inferred"`. This is the explicit -#' list of class names (must match names of subdirectories). Used -#' to control the order of the classes -#' (otherwise alphanumerical order is used). -#' -#' @param batch_size Size of the batches of data. Default: `32`. -#' -#' @param max_length Maximum size of a text string. Texts longer than this will -#' be truncated to `max_length`. -#' -#' @param shuffle Whether to shuffle the data. Default: `TRUE`. -#' If set to `FALSE`, sorts the data in alphanumeric order. -#' -#' @param seed Optional random seed for shuffling and transformations. -#' -#' @param validation_split Optional float between 0 and 1, -#' fraction of data to reserve for validation. -#' -#' @param subset One of "training" or "validation". -#' Only used if `validation_split` is set. -#' -#' @param follow_links Whether to visits subdirectories pointed to by symlinks. -#' Defaults to `FALSE`. -#' -#' @param ... For future compatibility (unused presently). -#' -#' @seealso -#' + -#' -#' @export -text_dataset_from_directory <- -function(directory, - labels = "inferred", - label_mode = "int", - class_names = NULL, - batch_size = 32L, - max_length = NULL, - shuffle = TRUE, - seed = NULL, - validation_split = NULL, - subset = NULL, - follow_links = FALSE, - ... -) -{ - args <- capture_args(match.call(), - list(batch_size = as.integer, - max_length = as_nullable_integer, - seed = as_nullable_integer)) - do.call(keras$preprocessing$text_dataset_from_directory, args) -} - -#' Creates a dataset of sliding windows over a timeseries provided as array -#' -#' @details -#' This function takes in a sequence of data-points gathered at -#' equal intervals, along with time series parameters such as -#' length of the sequences/windows, spacing between two sequence/windows, etc., -#' to produce batches of timeseries inputs and targets. -#' -#' @section Example 1: -#' -#' Consider indices `0:99`. With `sequence_length=10`, `sampling_rate=2`, -#' `sequence_stride=3`, `shuffle=FALSE`, the dataset will yield batches of -#' sequences composed of the following indices: -#' -#' ``` -#' First sequence: 0 2 4 6 8 10 12 14 16 18 -#' Second sequence: 3 5 7 9 11 13 15 17 19 21 -#' Third sequence: 6 8 10 12 14 16 18 20 22 24 -#' ... -#' Last sequence: 78 80 82 84 86 88 90 92 94 96 -#' ``` -#' -#' In this case the last 3 data points are discarded since no full sequence -#' can be generated to include them (the next sequence would have started -#' at index 81, and thus its last step would have gone over 99). -#' -#' @section Example 2: Temporal regression. -#' -#' Consider an array `data` of scalar values, of shape `(steps)`. -#' To generate a dataset that uses the past 10 -#' timesteps to predict the next timestep, you would use: -#' -#' ``` R -#' steps <- 100 -#' # data is integer seq with some noise -#' data <- array(1:steps + abs(rnorm(steps, sd = .25))) -#' inputs_data <- head(data, -10) # drop last 10 -#' targets <- tail(data, -10) # drop first 10 -#' dataset <- timeseries_dataset_from_array( -#' inputs_data, targets, sequence_length=10) -#' library(tfdatasets) -#' dataset_iterator <- as_iterator(dataset) -#' repeat { -#' batch <- iter_next(dataset_iterator) -#' if(is.null(batch)) break -#' c(input, target) %<-% batch -#' stopifnot(exprs = { -#' # First sequence: steps [1-10] -#' # Corresponding target: step 11 -#' all.equal(as.array(input[1, ]), data[1:10]) -#' all.equal(as.array(target[1]), data[11]) -#' -#' all.equal(as.array(input[2, ]), data[2:11]) -#' all.equal(as.array(target[2]), data[12]) -#' -#' all.equal(as.array(input[3, ]), data[3:12]) -#' all.equal(as.array(target[3]), data[13]) -#' }) -#' } -#' ``` -#' -#' @section Example 3: Temporal regression for many-to-many architectures. -#' -#' Consider two arrays of scalar values `X` and `Y`, -#' both of shape `(100)`. The resulting dataset should consist of samples with -#' 20 timestamps each. The samples should not overlap. -#' To generate a dataset that uses the current timestamp -#' to predict the corresponding target timestep, you would use: -#' -#' ``` R -#' X <- seq(100) -#' Y <- X*2 -#' -#' sample_length <- 20 -#' input_dataset <- timeseries_dataset_from_array( -#' X, NULL, sequence_length=sample_length, sequence_stride=sample_length) -#' target_dataset <- timeseries_dataset_from_array( -#' Y, NULL, sequence_length=sample_length, sequence_stride=sample_length) -#' -#' library(tfdatasets) -#' dataset_iterator <- -#' zip_datasets(input_dataset, target_dataset) %>% -#' as_array_iterator() -#' while(!is.null(batch <- iter_next(dataset_iterator))) { -#' c(inputs, targets) %<-% batch -#' stopifnot( -#' all.equal(inputs[1,], X[1:sample_length]), -#' all.equal(targets[1,], Y[1:sample_length]), -#' # second sample equals output timestamps 20-40 -#' all.equal(inputs[2,], X[(1:sample_length) + sample_length]), -#' all.equal(targets[2,], Y[(1:sample_length) + sample_length]) -#' ) -#' } -#' ``` -#' -#' @param data array or eager tensor -#' containing consecutive data points (timesteps). -#' The first axis is expected to be the time dimension. -#' -#' @param targets Targets corresponding to timesteps in `data`. -#' `targets[i]` should be the target -#' corresponding to the window that starts at index `i` -#' (see example 2 below). -#' Pass NULL if you don't have target data (in this case the dataset will -#' only yield the input data). -#' -#' @param sequence_length Length of the output sequences (in number of timesteps). -#' -#' @param sequence_stride Period between successive output sequences. -#' For stride `s`, output samples would -#' start at index `data[i]`, `data[i + s]`, `data[i + (2 * s)]`, etc. -#' -#' @param sampling_rate Period between successive individual timesteps -#' within sequences. For rate `r`, timesteps -#' `data[i], data[i + r], ... data[i + sequence_length]` -#' are used for create a sample sequence. -#' -#' @param batch_size Number of timeseries samples in each batch -#' (except maybe the last one). -#' -#' @param shuffle Whether to shuffle output samples, -#' or instead draw them in chronological order. -#' -#' @param seed Optional int; random seed for shuffling. -#' -#' @param start_index,end_index Optional int (1 based); data points earlier -#' than `start_index` or later then `end_index` will not be used -#' in the output sequences. This is useful to reserve part of the -#' data for test or validation. -#' -#' -#' -#' @param ... For backwards and forwards compatibility, ignored presently. -#' -#' @seealso -#' + -#' -#' @returns A `tf.data.Dataset` instance. If `targets` was passed, the -#' dataset yields batches of two items: `(batch_of_sequences, -#' batch_of_targets)`. If not, the dataset yields only -#' `batch_of_sequences`. -#' -#' -#' @section Example: -#' -#' ```` -#' int_sequence <- seq(20) -#' -#' dummy_dataset <- timeseries_dataset_from_array( -#' data = head(int_sequence, -3), # drop last 3 -#' targets = tail(int_sequence, -3), # drop first 3 -#' sequence_length = 3, -#' start_index = 3, -#' end_index = 9, -#' batch_size = 2 -#' ) -#' -#' library(tfdatasets) -#' dummy_dataset_iterator <- as_array_iterator(dummy_dataset) -#' -#' repeat { -#' batch <- iter_next(dummy_dataset_iterator) -#' if (is.null(batch)) # iterator exhausted -#' break -#' c(inputs, targets) %<-% batch -#' for (r in 1:nrow(inputs)) -#' cat(sprintf("input: [ %s ] target: %s\n", -#' paste(inputs[r,], collapse = " "), targets[r])) -#' cat("---------------------------\n") # demark batchs -#' } -#' ```` -#' Will give output like: -#' ```` -#' input: [ 3 4 5 ] target: 6 -#' input: [ 4 5 6 ] target: 7 -#' --------------------------- -#' input: [ 5 6 7 ] target: 8 -#' input: [ 6 7 8 ] target: 9 -#' --------------------------- -#' input: [ 7 8 9 ] target: 10 -#' ```` -#' -#' -#' @export -timeseries_dataset_from_array <- -function(data, targets, sequence_length, sequence_stride = 1L, - sampling_rate = 1L, batch_size = 128L, shuffle = FALSE, ..., - seed = NULL, start_index = NULL, end_index = NULL) -{ - # start_index and end_index are 0-based - require_tf_version("2.6", "timeseries_dataset_from_array") - - args <- capture_args(match.call(), list( - data = keras_array, - targets = keras_array, - sequence_length = as.integer, - sequence_stride = as.integer, - sampling_rate = as.integer, - batch_size = as.integer, - seed = as_nullable_integer, - start_index = as_slice_start, - end_index = as_slice_end - )) - do.call(keras$preprocessing$timeseries_dataset_from_array, args) -} - - -as_slice_start <- function(x) { - if (is.null(x)) - return(x) - x <- as.integer(x) - if (x >= 1) - x <- x - 1L - x -} - - -as_slice_end <- function(x) { - if(is.null(x)) - return(x) - x <- as.integer(x) - if(x == -1L) - return(NULL) - if(x < 0) - x <- x + 1L - x -} diff --git a/R/py-classes.R b/R/py-classes.R index 0810449d6f..a32d5d1cb7 100644 --- a/R/py-classes.R +++ b/R/py-classes.R @@ -1,130 +1,246 @@ -#' @importFrom reticulate r_to_py import_builtins py_eval py_dict py_call -#' @export -r_to_py.R6ClassGenerator <- function(x, convert = FALSE) { - inherit <- resolve_py_type_inherits(x$get_inherit(), convert) - mask_env <- new.env(parent = x$parent_env) - # common-mask-env: `super`, `__class__`, classname +# TODO: use this in register_keras_serializable()? +generate_module_name <- function(env) { + while((name <- environmentName(env)) == "") + env <- parent.env(env) + if(isNamespace(env)) + name <- paste0("namespace:", name) + else if (name == "R_GlobalEnv") + name <- "globalenv" + sprintf("", name) +} - # R6 by default includes this in public methods list, not applicable here. - methods <- x$public_methods - methods$clone <- NULL +new_py_class <- function(classname, + members = list(), + inherit = NULL, + parent_env = parent.frame(), + inherit_expr, + convert = TRUE) { + if (!missing(inherit_expr)) + inherit <- eval(inherit_expr, parent_env) + new_py_type( + classname, + members = members, + inherit = inherit, + parent_env = parent_env + ) +} - methods <- as_py_methods(methods, mask_env, convert) - active <- as_py_methods(x$active, mask_env, convert) - # having convert=FALSE here means py callables are not wrapped in R functions - # https://github.com/rstudio/reticulate/issues/1024 - builtins <- import_builtins(convert) +new_wrapped_py_class <- +function(classname, + members = list(), + inherit = NULL, + parent_env = parent.frame(), + private = list(), + # modifiers = quote(expr =), + default_formals = function(...) {}) +{ + # force all new_py_type() args + classname; members; inherit; parent_env; private; + + delayedAssign(classname, + new_py_type( + classname = classname, + members = members, + inherit = resolve_py_obj(inherit, env = parent_env), + parent_env = parent_env, + private = private + ) + ) + delayedAssign("__class__", get(classname)) - py_property <- builtins$property - active <- lapply(active, function(fn) py_call(py_property, fn, fn)) + if (is_keras_loaded()) { + # force promise, get actual frmls + frmls <- formals(`__class__`) + } else { + # try to infer frmls + frmls <- formals(members$`__init__ ` %||% + members$initialize %||% + default_formals) + } + frmls$self <- NULL - namespace <- c(x$public_fields, methods, active) + bdy <- bquote({ + args <- capture_args(enforce_all_dots_named = FALSE) # .(modifiers), + do.call(.(as.name(classname)), args) + }) + rm(default_formals) # free memory ; rm(modifiers) + + as.function.default(c(frmls, bdy)) +} + +new_py_type <- +function(classname, + members = list(), + inherit = NULL, + parent_env = parent.frame(), + private = list()) +{ + + if (is.language(inherit)) + inherit <- eval(inherit, parent_env) + + convert <- TRUE + inherit <- resolve_py_type_inherits(inherit, convert) + mask_env <- new.env(parent = parent_env) + # common-mask-env: `super`, `__class__`, classname + + members <- normalize_py_type_members(members, mask_env, convert, classname) # we need a __module__ because python-keras introspects to see if a layer is # subclassed by consulting layer.__module__ # (not sure why builtins.issubclass() doesn't work over there) # `__module__` is used to construct the S3 class() of py_class instances, # it needs to be stable (e.g, can't use format(x$parent_env)) - if(!"__module__" %in% names(namespace)) - namespace$`__module__` <- "R6type" + if (!"__module__" %in% names(members)) + members$`__module__` <- generate_module_name(parent_env) - new_exec_body <- py_eval("lambda ns_entries: (lambda ns: ns.update(ns_entries))", - convert=convert) - exec_body <- py_call(new_exec_body, - py_dict(names(namespace), unname(namespace), convert)) + exec_body <- py_eval( + "lambda ns_entries: (lambda ns: ns.update(ns_entries))")(members) - py_class <- py_call(import("types", convert=convert)$new_class, - name = x$classname, + py_class <- import("types")$new_class( + name = classname, bases = inherit$bases, kwds = inherit$keywords, exec_body = exec_body ) - # https://github.com/rstudio/reticulate/issues/1024 - py_class <- py_to_r(py_class) - assign("convert", convert, as.environment(py_class)) - mask_env$`__class__` <- py_class - mask_env[[x$classname]] <- py_class - attr(mask_env, "get_private") <- - new_get_private(r6_class = x, shared_mask_env = mask_env) - - eval(quote({ - super <- base::structure( - function(type = get("__class__"), - object = base::get("self", parent.frame())) { - convert <- get("convert", envir = as.environment(object)) - bt <- reticulate::import_builtins(convert) - reticulate::py_call(bt$super, type, object) - }, - class = "python_class_super") - }), mask_env) + mask_env[[classname]] <- py_class + if (!is.null(private)) { + attr(mask_env, "get_private") <- + new_get_private(private, shared_mask_env = mask_env) + } + eval(envir = mask_env, quote({ + super <- function( + type = `__class__`, + object_or_type = base::get("self", envir = base::parent.frame())) + { + convert <- base::get("convert", envir = base::as.environment(object_or_type)) + py_builtins <- reticulate::import_builtins(convert) + reticulate::py_call(py_builtins$super, type, object_or_type) + } + class(super) <- "python_builtin_super_getter" + })) - attr(py_class, "r6_class") <- x - class(py_class) <- c("py_R6ClassGenerator", class(py_class)) py_class } -#' @importFrom reticulate py_id -new_get_private <- function(r6_class, shared_mask_env) { - force(r6_class); force(shared_mask_env) +# S3 methods for nice access from class methods like +# - super$initialize() +# - super()$initialize() +# - super(Classname, self)$initialize() +#' @export +`$.python_builtin_super_getter` <- function(x, name) { + super <- do.call(x, list(), envir = parent.frame()) # call super() + name <- switch(name, initialize = "__init__", finalize = "__del__", name) + out <- py_get_attr(super, name) + convert <- get0("convert", as.environment(out), inherits = FALSE, + ifnotfound = TRUE) + if (convert) py_to_r(out) else out +} - privates <- list() +#' @export +`[[.python_builtin_super_getter` <- `$.python_builtin_super_getter` - new_instance_private <- function(self, key) { +# No .DollarNames.python_builtin_super_getter because the python.builtin.super +# object doesn't have populated attributes itself, only a dynamic `__getattr__` +# method that resolves dynamically. - private <- new.env(parent = emptyenv()) - privates[[key]] <<- private - reticulate::import("weakref")$finalize( - self, finalize_instance_private, key) +#' @importFrom reticulate r_to_py import_builtins py_eval py_dict py_call +#' @export +r_to_py.R6ClassGenerator <- function(x, convert = TRUE) { + members <- c(x$public_fields, + x$public_methods, + lapply(x$active, active_property)) + members$clone <- NULL + new_py_type( + classname = x$classname, + inherit = x$get_inherit(), + members = members, + private = c(x$private_fields, + x$private_methods), + parent_env = x$parent_env + ) +} - if (length(r6_class$private_fields)) - list2env(r6_class$private_fields, envir = private) - if (length(r6_class$private_methods)) { - instance_mask_env <- new.env(parent = shared_mask_env) - instance_mask_env$self <- self - instance_mask_env$private <- private +normalize_py_type_members <- function(members, env, convert, classname) { + + if (all(c("initialize", "__init__") %in% names(members))) + stop("You should not specify both `__init__` and `initialize` methods.") + + if (all(c("finalize", "__del__") %in% names(members))) + stop("You should not specify both `__del__` and `finalize` methods.") + + names(members) <- names(members) %>% + replace_val("initialize", "__init__") %>% + replace_val("finalize", "__del__") + + members <- imap(members, function(x, name) { + if (!is.function(x)) + return(x) + as_py_method(x, name, env, convert, + label = sprintf("%s$%s", classname, name)) + }) + + members +} - for (nm in names(r6_class$private_methods)) { - method <- r6_class$private_methods[[nm]] - environment(method) <- instance_mask_env - private[[nm]] <- method - } - } + +#' @importFrom reticulate py_get_item py_del_item import +new_get_private <- function(members, shared_mask_env) { + force(members); force(shared_mask_env) + + # python should never see privates. + # also, avoid invoking __hash__ on the py obj, which + # might error or return non-unique values. + delayedAssign("class_privates", fastmap::fastmap()) + + new_instance_private <- function(self) { + private <- new.env(parent = emptyenv()) + class_privates$set(py_id(self), private) + + import("weakref")$finalize( + self, del_instance_private, self) + + instance_mask_env <- new.env(parent = shared_mask_env) + # TODO: is this `self` assignment a circular reference that prevents the + # object from being collected? should it be a weakref? + # add tests to make sure that the object is collected when it should be. + instance_mask_env$self <- self + instance_mask_env$private <- private + members <- lapply(members, function(member) { + if (is.function(member) && !is_py_object(member)) + environment(member) <- instance_mask_env + member + }) + active <- map_lgl(members, is_marked_active) + list2env(members[!active], envir = private) + imap(members[active], function(fn, name) { + makeActiveBinding(name, fn, private) + }) private } - finalize_instance_private <- function(key) { - privates[[key]] <<- NULL + del_instance_private <- function(self) { + class_privates$remove(py_id(self)) } function(self) { - key <- py_id2(self) - .subset2(privates, key) %||% new_instance_private(self, key) + class_privates$get(py_id(self)) %||% + new_instance_private(self) } } -py_id2 <- local({ - # temporary workaround py_id() overflowing and returning -1L in R 4.2 on windows - .id <- function(x) { - .id <- py_eval("lambda x: str(id(x))") - assign(".id", .id, envir = environment(sys.function())) - .id(x) - } - function(x) .id(x) -}) - - - +#' @importFrom reticulate tuple dict resolve_py_type_inherits <- function(inherit, convert=FALSE) { # inherits can be @@ -137,7 +253,7 @@ resolve_py_type_inherits <- function(inherit, convert=FALSE) { # (both potentially of length 0) if(is.null(inherit) || identical(inherit, list())) - return(list(bases = tuple(), keywords = list())) + return(list(bases = tuple(), keywords = dict())) bases <- if (inherits(inherit, "python.builtin.tuple")) as.list(inherit) @@ -155,68 +271,53 @@ resolve_py_type_inherits <- function(inherit, convert=FALSE) { names(bases) <- NULL bases <- lapply(bases, function(cls) { - if (inherits(cls, "R6ClassGenerator")) - return(r_to_py.R6ClassGenerator(cls, convert)) - - if (!inherits(cls, "python.builtin.object")) + if (!is_py_object(cls)) tryCatch( cls <- r_to_py(cls), error = function(e) stop(e, "Supplied superclasses must be python objects, not: ", paste(class(cls), collapse = ", ")) ) - - if(inherits(cls, "python.builtin.type") && is.function(cls)) - force(environment(cls)$callable) - cls }) - bases <- do.call(tuple, bases) list(bases = bases, keywords = keywords) } -as_py_methods <- function(x, env, convert) { - out <- list() - - if ("initialize" %in% names(x) && "__init__" %in% names(x)) - stop("You should not specify both `__init__` and `initialize` methods.") - - if ("finalize" %in% names(x) && "__del__" %in% names(x)) - stop("You should not specify both `__del__` and `finalize` methods.") - - for (name in names(x)) { - fn <- x[[name]] - name <- switch(name, - initialize = "__init__", - finalize = "__del__", - name) - out[[name]] <- as_py_method(fn, name, env, convert) - } - out -} #' @importFrom reticulate py_func py_clear_last_error -as_py_method <- function(fn, name, env, convert) { +as_py_method <- function(fn, name, env, convert, label) { # if user did conversion, they're responsible for ensuring it is right. - if (inherits(fn, "python.builtin.object")) { + if (is_py_object(fn)) { #assign("convert", convert, as.environment(fn)) return(fn) } + srcref <- attr(fn, "srcref") + if (!is.function(fn)) stop("Cannot coerce non-function to a python class method") environment(fn) <- env - if (!identical(formals(fn)[1], alist(self =))) - formals(fn) <- c(alist(self =), formals(fn)) + decorators <- attr(fn, "py_decorators", TRUE) + # if(is_marked_active(fn)) + + if ("staticmethod" %in% decorators) { + # do nothing + } else if ("classmethod" %in% decorators) { + fn <- ensure_first_arg_is(fn, cls = ) + } else { + # standard pathway, ensure the method receives 'self' as first arg + fn <- ensure_first_arg_is(fn, self = ) + } doc <- NULL - if (body(fn)[[1]] == quote(`{`) && + if (is.call(body(fn)) && + body(fn)[[1]] == quote(`{`) && length(body(fn)) > 1 && typeof(body(fn)[[2]]) == "character") { doc <- glue::trim(body(fn)[[2]]) @@ -233,11 +334,10 @@ as_py_method <- function(fn, name, env, convert) { if (!"private" %in% names(formals(fn)) && "private" %in% all.names(body(fn))) { - # any benefit to using delayedAssign here? body(fn) <- substitute({ - private <- attr(env, "get_private", TRUE)(self) + delayedAssign("private", attr(parent.env(environment()), "get_private", TRUE)(self)) body - }, list(body = body(fn), env = env)) + }, list(body = body(fn))) } # python tensorflow does quite a bit of introspection on user-supplied @@ -251,24 +351,39 @@ as_py_method <- function(fn, name, env, convert) { # Can't use py_func here because it doesn't accept a `convert` argument - py_sig <- tryCatch(r_formals_to_py__signature__(fn), - error = function(e) NULL) + # Can't use __signature__ to communicate w/ the python side anymore + # because binding of 'self' for instance methods doesn't update __signature__, + # resulting in errors for checks in keras_core for 'build()' method arg names. - attr(fn, "py_function_name") <- name + # attr(fn, "py_function_name") <- name + attr(fn, "pillar") <- list(label = label) # for print method of rlang::trace_back() + fn <- py_func2(fn, convert, name = name) # https://github.com/rstudio/reticulate/issues/1024 - fn <- py_to_r(r_to_py(fn, convert)) - assign("convert", convert, as.environment(fn)) - - if(!is.null(py_sig)) - fn$`__signature__` <- py_sig + # fn <- py_to_r(r_to_py(fn, convert)) + # assign("convert", convert, as.environment(fn)) if(!is.null(doc)) fn$`__doc__` <- doc + attr(fn, "srcref") <- srcref + # TODO, maybe also copy over "wholeSrcref". See `removeSource()` as a starting point. + # This is used to generate clickable links in rlang traceback printouts. + bt <- import_builtins() + for (dec in decorators) { + if (identical(dec, "property") && length(formals(fn)) > 1) { + fn <- bt$property(fn, fn) # getter and setter + next + } + if (is_string(dec)) { + dec <- bt[[dec]] + } + fn <- dec(fn) + } fn } +#' @importFrom rlang is_string r_formals_to_py__signature__ <- function(fn) { inspect <- import("inspect", convert = FALSE) py_repr <- import_builtins(FALSE)$repr @@ -280,8 +395,7 @@ r_formals_to_py__signature__ <- function(fn) { for (nm in names(frmls)) { if(nm == "...") { params$extend(list( - Param("_R_dots_positional_args", Param$VAR_POSITIONAL), - Param("_R_dots_keyword_args", Param$VAR_KEYWORD) + Param("_R_dots_positional_args", Param$VAR_POSITIONAL) )) kind <- Param$KEYWORD_ONLY next @@ -299,10 +413,50 @@ r_formals_to_py__signature__ <- function(fn) { inspect$Parameter(nm, kind, default=default) ) } + if("..." %in% names(frmls)) + # need to make sure that `**kwarg` is last in signature, + # in case there are args after R `...`, we need to reorder + # so the py sig looks like `(foo, *args, bar, **kwargs)` + params$extend(list( + Param("_R_dots_keyword_args", Param$VAR_KEYWORD) + )) + inspect$Signature(params) } +py_func2 <- function(fn, convert, name = deparse(substitute(fn))) { + # TODO: wrap this all in a tryCatch() that gives a nice error message + # about unsupported signatures + sig <- py_to_r(r_formals_to_py__signature__(fn)) + inspect <- import("inspect") + pass_sig <- iterate(sig$parameters$values(), function(p) { + if(p$kind == inspect$Parameter$POSITIONAL_ONLY) + p$name + else if (p$kind == inspect$Parameter$POSITIONAL_OR_KEYWORD) + # pass as positional, since there might be a positional args collector up ahead, and + # having kwargs before a positional collector is illegal + p$name + else if (p$kind == inspect$Parameter$VAR_POSITIONAL) + paste0("*", p$name) + else if (p$kind == inspect$Parameter$VAR_KEYWORD) + paste0("**", p$name) + else if(p$kind == inspect$Parameter$KEYWORD_ONLY) + paste0(p$name, "=", p$name) + else + stop("Unrecognized function argument type: ", p$name) + }) + pass_sig <- paste0(pass_sig, collapse = ", ") + code <- glue::glue(" +def wrap_fn(r_fn): + def {name}{py_str(sig)}: + return r_fn({pass_sig}) + return {name} + ") + util <- reticulate::py_run_string(code, local = TRUE, convert = convert) + util$wrap_fn(fn) +} + # TODO: (maybe?) factor out a py_class() function, # funnel r_to_py.R6ClassGenerator() and %py_class%() to go through py_class() @@ -315,7 +469,9 @@ r_formals_to_py__signature__ <- function(fn) { # *) `super` can be accessed in both R6 style using `$`, and python-style as a callable # *) `super()` can resolve `self` properly when called from a nested scope # *) method calls respect user-supplied `convert` values for all args -# + + +# @seealso #' Make a python class constructor @@ -324,12 +480,12 @@ r_formals_to_py__signature__ <- function(fn) { #' @param body an expression that can be evaluated to construct the class #' methods. #' -#' @return The python class constructor, invisibly. Note, the same constructor is +#' @returns The python class constructor, invisibly. Note, the same constructor is #' also assigned in the parent frame. #' @export #' @aliases py_class -#' -#' @seealso +#' @keywords internal +#' @seealso [`%<-active%()`] #' #' @examples #' \dontrun{ @@ -385,6 +541,17 @@ r_formals_to_py__signature__ <- function(fn) { #' #' call_private_method <- function() #' private$a_private_method() +#' +#' # equivalent of @property decorator in python +#' an_active_property %<-active% function(x = NULL) { +#' if(!is.null(x)) { +#' cat("`an_active_property` was assigned", x, "\n") +#' return(x) +#' } else { +#' cat("`an_active_property` was accessed\n") +#' return(42) +#' } +#' } #' } #' #' inst1 <- MyClass(1) @@ -393,6 +560,8 @@ r_formals_to_py__signature__ <- function(fn) { #' inst2$get_private_field() #' inst1$call_private_method() #' inst2$call_private_method() +#' inst1$an_active_property +#' inst1$an_active_property <- 11 #' } `%py_class%` <- function(spec, body) { spec <- substitute(spec) @@ -455,36 +624,29 @@ r_formals_to_py__signature__ <- function(fn) { public[[nm]] <- env[[nm]] } + # TODO: re-enable delayed pyclasses. + # if (delay_load) + # py_class <- delayed_r_to_py_R6ClassGenerator(r6_class, convert) + # else + # py_class <- r_to_py.R6ClassGenerator(r6_class, convert) - # R6Class() calls substitute() on inherit; - r6_class <- eval(as.call(list( - quote(R6::R6Class), + inherit <- eval(inherit, parent_env) + active <- lapply(active, active_property) + + py_class <- new_py_type( classname = classname, - public = public, - private = private, - active = active, inherit = inherit, - cloneable = FALSE, + members = c(public, active), + private = private, parent_env = parent_env - ))) - - - if (delay_load) - py_class <- delayed_r_to_py_R6ClassGenerator(r6_class, convert) - else - py_class <- r_to_py.R6ClassGenerator(r6_class, convert) + ) - attr(py_class, "r6_class") <- r6_class - class(py_class) <- c("py_converted_R6_class_generator", class(py_class)) + # attr(py_class, "r6_class") <- r6_class assign(classname, py_class, envir = parent_env) invisible(py_class) } -if (getRversion() < "4.0") - activeBindingFunction <- function(nm, env) { - as.list.environment(env, all.names = TRUE)[[nm]] - } #' @importFrom reticulate py_call py_to_r py_callable_as_function2 <- function(callable, convert) { @@ -545,8 +707,9 @@ delayed_r_to_py_R6ClassGenerator <- function(r6_class, convert) { fn } -#' @export -print.py_R6ClassGenerator <- function(x, ...) { +# @export +# print.py_R6ClassGenerator <- +function(x, ...) { r6_class <- attr(x, "r6_class") if (isTRUE(get0("delayed", attr(x, "py_object")))) cat(sprintf(" (delayed)\n", r6_class$classname)) @@ -556,18 +719,38 @@ print.py_R6ClassGenerator <- function(x, ...) { print(r6_class) } +# @export +# `$.py_R6ClassGenerator` <- +function(x, name) { + if (identical(name, "new")) + return(x) + NextMethod() +} + +# @exportS3Method pillar::type_sum +# @rawNamespace S3method(pillar::type_sum,py_R6ClassGenerator) +# type_sum.py_R6ClassGenerator <- +function(x) { + cl <- class(x)[[1L]] + if(startsWith(cl, "R6type.")) + cl <- substr(cl, 8L, 2147483647L) + cl +} + + #' Make an Active Binding #' #' @param sym symbol to bind #' @param value A function to call when the value of `sym` is accessed. #' -#' @return `value`, invisibly +#' @returns `value`, invisibly #' @export #' #' @details Active bindings defined in a [`%py_class%`] are converted to #' `@property` decorated methods. #' #' @seealso [`makeActiveBinding()`] +#' @keywords internal #' #' @examples #' set.seed(1234) @@ -600,3 +783,55 @@ maybe_delayed_r_to_py_R6ClassGenerator <- else delayed_r_to_py_R6ClassGenerator(x, convert) } + +ensure_first_arg_is <- function(fn, ...) { + frmls <- formals(fn) + arg <- eval(substitute(alist(...))) + if (!identical(frmls[1], arg)) + formals(fn) <- c(arg, frmls) + fn +} + + + +#' Create an active property class method +#' +#' @param fn An R function +#' +#' @description +#' +#' # Example +#' ```r +#' layer_foo <- Model("Foo", ..., +#' metrics = active_property(function() { +#' list(self$d_loss_metric, +#' self$g_loss_metric) +#' })) +#' ``` +#' @returns `fn`, with an additional R attribute that will cause `fn` to be +#' converted to an active property when being converted to a method of a +#' custom subclass. +#' @export +active_property <- function(fn) { + if(!is.function(fn)) + stop("Only functions can be active properties") + append1(attr(fn, "py_decorators")) <- "property" + fn +} + +decorate_method <- function(fn, decorator) { + append1(attr(fn, "py_decorators")) <- decorator + fn +} + +drop_null_defaults <- function(args, fn = sys.function(-1L)) { + null_default_args <- names(which(vapply(formals(fn), is.null, TRUE))) + drop_nulls(args, null_default_args) +} + +is_marked_active <- function(x) { + for (dec in attr(x, "py_decorators", TRUE)) + if (identical(dec, "property")) + return (TRUE) + FALSE +} diff --git a/R/r-utils.R b/R/r-utils.R new file mode 100644 index 0000000000..ca4d07a831 --- /dev/null +++ b/R/r-utils.R @@ -0,0 +1,861 @@ +# ---- general utils ---- + +is_backend <- function(name) { + identical(keras$config$backend(), name) +} + +is_windows <- function() { + identical(.Platform$OS.type, "windows") +} + +is_osx <- function() { + Sys.info()["sysname"] == "Darwin" +} + +is_mac_arm64 <- function() { + sys_info <- Sys.info() + sys_info[["sysname"]] == "Darwin" && + sys_info[["machine"]] == "arm64" +} + +is_scalar <- function(x) identical(length(x), 1L) + +# is_py_object <- function(x) is_py_object(x) + +split_dots_named_unnamed <- function(dots) { + nms <- names(dots) + if (is.null(nms)) + return(list(unnamed = dots, named = list())) + named <- nzchar(nms) + list(unnamed = dots[!named], named = dots[named]) +} + +drop_nulls <- function(x, i = NULL) { + if(is.null(i)) + return(x[!vapply(x, is.null, FALSE, USE.NAMES = FALSE)]) + + drop <- logical(length(x)) + names(drop) <- names(x) + drop[i] <- vapply(x[i], is.null, FALSE, USE.NAMES = FALSE) + x[!drop] +} + +#' @importFrom rlang dots_list +# identical to rlang::list2(), except .named = TRUE +named_list <- function(...) + dots_list(..., + .named = TRUE, + # not the default + .ignore_empty = "trailing", + .preserve_empty = FALSE, + .homonyms = "error", + .check_assign = FALSE) + +`append1<-` <- function(x, value) { + x[[length(x) + 1L]] <- value + x +} + +`append<-` <- function(x, value) c(x, value) + +`prepend<-` <- function(x, value) c(value, x) # c(x[integer()], value, x) + +replace_val <- function(x, old, new) { + if (!is_scalar(new)) + stop("Unexpected length of replacement value in replace_val().\n", + "`new` must be length 1, not ", length(new)) + x[x %in% old] <- new + x +} + +imap <- function(.x, .f, ...) { + out <- .mapply(.f, list(.x, names(.x) %||% seq_along(.x)), list(...)) + names(out) <- names(.x) + out +} + +map2 <- function(.x, .y, .f, ...) { + out <- .mapply(.f, list(.x, .y), list(...)) + if(length(.x) == length(out)) + names(out) <- names(.x) + out +} + +map_chr <- function(.x, .f, ...) { + out <- vapply(X = .x, FUN = .f, FUN.VALUE = "", ..., USE.NAMES = FALSE) + names(out) <- names(.x) + out +} + +map_lgl <- function(.x, .f, ...) { + out <- vapply(X = .x, FUN = .f, FUN.VALUE = TRUE, ..., USE.NAMES = FALSE) + names(out) <- names(.x) + out +} + +map_int <- function(.x, .f, ...) { + out <- vapply(X = .x, FUN = .f, FUN.VALUE = 0L, ..., USE.NAMES = FALSE) + names(out) <- names(.x) + out +} + +last <- function(x) x[[length(x)]] + +second_to_last <- function(x) + if((lx <- length(x)) > 1) x[[lx-1L]] + +rename <- function(x, ..., .skip_existing = TRUE) { + dots <- list(...) + nms <- names(x) + for(i in seq_along(dots)) { + newname <- names(dots)[[i]] + oldname <- dots[[i]] + if(.skip_existing && newname %in% nms) + next + nms[match(oldname, nms)] <- newname + } + names(x) <- nms + x +} + +`%""%` <- function (x, y) { + if(!is.character(x)) + stop("x must be character") + not_empty <- nzchar(x) + if(all(not_empty)) + return(x) + if(!is.character(y)) + stop("y must be character") + # don't force `y` unless needed + if (!identical(length(y), length(x))) { + stopifnot(identical(length(y), 1L)) + y <- rep(y, length(x)) + } + empty <- !not_empty + x[empty] <- y[empty] + x +} + + +# ---- arg checkers ---- + +check_bool <- function(x) { + if (identical(x, TRUE) || identical(x, FALSE)) + x + else + stop(sprintf("`%s` arg must be `TRUE` or `FALSE`", + deparse1(substitute(x)))) +} + + + +# ---- arg transformers ---- + +as_array <- function(x) + if(is.null(x) || is_py_object(x) || is.array(x)) + x else as.array(x) + +as_py_array <- function(x) + if(is.null(x) || is_py_object(x)) + x else np_array(x) + +as_r_value <- function (x) + if (is_py_object(x)) + py_to_r(x) else x + +as_axis <- function(axis) { + if (is.null(axis)) + return(NULL) + + if (length(axis) > 1) + return(lapply(axis, as_axis)) + + axis <- as.integer(axis) + + if (axis == 0L) + stop("`axis` argument is 1 based, received 0") + + if (axis > 0L) axis - 1L + else axis +} + + +# Helper function to coerce shape arguments to tuple +# tf$reshape()/k_reshape() doesn't accept a tf.TensorShape object +normalize_shape <- function(shape) { + + # reflect NULL back + if (is.null(shape)) + return(shape) + + # already fixed up + if (inherits(shape, "keras_shape")) + return(shape) + + # if it's a list or a numeric vector then convert to integer + # NA's in are accepted as NULL + # also accept c(NA), as if it was a numeric + if (is.list(shape) || is.numeric(shape) || + (is.logical(shape) && all(is.na(shape)))) { + + shape <- lapply(shape, function(value) { + # Pass through python objects unmodified, only coerce R objects + # supplied shapes, e.g., to tf$random$normal, can be a list that's a mix + # of scalar integer tensors and regular integers + if (is_py_object(value)) + return(value) + + # accept NA,NA_integer_,NA_real_ as NULL + if ((is_scalar(value) && is.na(value))) + return(NULL) + + if (!is.null(value)) + as.integer(value) + else + NULL + }) + } + + if (inherits(shape, "tensorflow.python.framework.tensor_shape.TensorShape")) + shape <- as.list(shape$as_list()) # unpack for tuple() + + # coerce to tuple so it's iterable + tuple(shape) +} + +as_integer <- function(x) { + if (is.numeric(x)) + as.integer(x) + else + x +} + +as_integer_array <- function(x) { + if(is.atomic(x)) + x <- as.array(x) + if(is.array(x) && storage.mode(x) != "integer") + storage.mode(x) <- "integer" + x +} + +as_integer_tuple <- function(x, force_tuple = FALSE) { + if (is.null(x)) + x + else if (is.list(x) || force_tuple) + tuple(as.list(as.integer(x))) + else + as.integer(x) +} + +as_nullable_integer <- function(x) { + if (is.null(x)) + x + else + as.integer(x) +} + +as_layer_index <- function(x) { + if (is.null(x)) + return(x) + + x <- as.integer(x) + + if (x == 0L) + stop("`index` for get_layer() is 1-based (0 was passed as the index)") + + if (x > 0L) + x - 1L + else + x +} + + + +as_node_index <- function(node_index) { + as.integer(node_index-1) +} + + +# Helper function to normalize paths +normalize_path <- function(path) { + if (is.null(path)) + NULL + else + normalizePath(path.expand(path), mustWork = FALSE) +} + + +# unused +as_index <- function(x) { + if(storage.mode(x) == "double") + storage.mode(x) <- "integer" + # k_array() pass through here... + # TODO: implement an efficient way to check for negative slices + x - 1L +} + + +# Sketch for an alternative approach to offsetting indexes, +# so that they are 1 based in the R runtime, but convert into python +# as 0 based. Alternative implementaiton for Callback() epochs, +# LearningRateSchedule(), and similar. +# +# as_r_index <- function(x) { +# if(is.double(x)) +# x <- as.integer(x) +# class(x) <- c("r_index", class(x)) +# x +# } +# +# r_to_py.r_index <- function(x) { +# if (x > 0L) x - 1L else x +# } +# +# zero_to_one_index <- function(x) x + 1L + + +# ---- resolve_py_obj ---- + + + +resolve_wrapper_py_obj_expr <- function(x, prefer_class = TRUE) { + if (!identical(class(x), "function")) + return() + + ns <- environment(sys.function()) # keras3 namespace + xe <- environment(x) + + if (identical(xe, emptyenv())) + return() + + # only inspect pkg functions, or pkg wrapped functions + + ## is a wrapper returned by new_wrapped_py_class(), like Layer() + if (identical(parent.env(xe), ns)) + return(quote(`__class__`)) + + ## is a pkg exported function + if (!(identical(xe, ns))) + return() + + # standard builtin wrapper, like layer_dense, loss_* + # (or Layer(), though that's handled above) + last_cl <- last(body(x)) + if (is.call(last_cl) && + (identical(last_cl[[1L]], quote(do.call)) || + identical(last_cl[[1L]], quote(create_layer)))) { + expr <- last_cl[[2L]] + if (identical(expr, quote(callable))) { + # loss_ or metric_ wrapper + if (prefer_class) + expr <- second_to_last(body(x))[[c(3, 3)]] + else + expr <- second_to_last(body(x))[[c(3, 4)]] + } + return(expr) + } + + # application wrapper + if (is.call(last_cl) && + identical(last_cl[[1L]], quote(set_preprocessing_attributes)) && + is.call(last_cl2 <- as.list(body(x))[[length(body(x)) - 1L]]) && + (identical(last_cl2[[c(3L, 1L)]], quote(do.call)))) + return(last_cl2[[c(3L, 2L)]]) + + # bare builtin op_wrapper, like + # op_add <- function(x1, x2) keras$ops$add(x1, x2) + if (is.call(cl <- body(x)) && + (is.call(cl0 <- cl1 <- cl[[1L]]) || + ( + identical(cl0, quote(`{`)) && + length(cl1 <- as.list(cl[-1])) == 1 && + is.call(cl <- cl1[[1L]]) && + is.call(cl0 <- cl1 <- cl[[1L]]) + ))) + { + while (is.call(cl0) && identical(cl0[[1L]], quote(`$`))) + cl0 <- cl0[[2L]] + + if (identical(cl0, quote(keras))) + return(cl1) + } + + NULL +} + +resolve_py_obj <- function(x, default_name = "anonymous_R_function", + env = asNamespace("keras3"), + prefer_class = TRUE, + convert = TRUE) { + # this function is used: + # - to resolve `inherit` args in the keras subclassing API + # (e.g., if `inherit` arg is a wrapper like `layer_dense`, or + # `layer_custom` returned by a Layer("Custom", ...)) + # - to resolve args that can come in as callables to `compile()` + # (e.g., loss, metrics) + # - to resolve args that can come in as callables passed to layer_* constructors. + # (e.g., activations, initializers) + # - to resolve custom_objects supplied to the saving & serialization API, + # (e.g., with_custom_object_scope(), load_model(), ...) + + # - `x` can come in as a language object, enabling lazy evaluation / + # delayed initialization python + # - If `x` is a package exported wrapper, like `layer_dense` or similar, + # this will return the py callable object, like `keras$layers$Dense` + # This should work with *all* exported wrappers + # (loss_, activation_, layer_, op_*, etc.) + # - Otherwise, If `x` is a bare R function, it will be coerced to + # a python function with `py_func2()`, which is similar to the default + # r_to_py() except: + # - the constructed python wrapper has an accurate signature that + # matches the R func (needed in some places where keras inspects the + # callable signature) + # - We work harder/better to resolve an appropriate __name__ (accepting + # R attributes "name", "__name__" and "py_function_name", and give an + # opportunity for us to provide a better default like "custom_metric" + # from methods like `compile()`) + # - Otherwise, we return `x` unmodified (assuming it will be coerced via + # r_to_py() downstream). If `convert = FALSE`, we eagerly call `r_to_py(x)`. + + if (is.language(x)) + x <- eval(x, env) + + if (is.null(x) || is_py_object(x)) + return(x) + + if (is_bare_r_function(x)) { + + py_obj_expr <- resolve_wrapper_py_obj_expr(x, prefer_class = prefer_class) + if (!is.null(py_obj_expr)) { + # eval in environment(x): wrapper env, where we might find `__class__`. + py_obj <- tryCatch(eval(py_obj_expr, environment(x)), + error = function(e) NULL) + + if (is_py_object(py_obj)) + return(py_obj) + } + + return(as_py_function(x, default_name = default_name)) + } + + if (convert) x else r_to_py(x) +} + +is_bare_r_function <- function(x) { + identical(class(x), "function") +} + +as_py_name <- function(x) { + # sanitize a deparsed R expression into valid python symbol string + if(is.language(x)) + x <- deparse(x, width.cutoff = 500L)[1] + x <- make.names(as.character(x)) + x <- gsub(".", "_", x, fixed = TRUE) + x +} + +as_py_function <- function(fn, default_name = "r_func") { + if(is_py_object(fn)) + return(fn) + + name <- + attr(fn, "py_function_name", TRUE) %||% + attr(fn, "__name__", TRUE) %||% + attr(fn, "name", TRUE) %||% + default_name + + # TODO: try to generate a pretty name using deparse(substitute(x)) would need + # to update capture_args() to construct calls to transformers so that + # substitute will work here. + # if(is.null(name)) { name <- as_py_name(deparse1(substitute(x)))} + py_func2(fn, convert = TRUE, name = name) +} + +get_function_name <- function(fn) { + if (is_py_object(fn)) + return(py_to_r(py_get_attr(fn, "__name__"))) + + attr(fn, "py_function_name", TRUE) %||% + attr(fn, "__name__", TRUE) %||% + attr(fn, "name", TRUE) +} + + + +# if(FALSE) { +# # TODO: use this to generate a static list for populating +# # a reverse lookup hashtable +# x <- lapply(asNamespace("keras3"), resolve_wrapper_py_obj_expr) |> +# purrr::map_chr(\(expr) if(is.null(expr)) "" else deparse1(expr)) +# df <- tibble::enframe(x, value = "expr") +# df <- df[order(df$name),] +# success <- df$expr != "" +# +# +# df[success, ] |> print(n = Inf) +# df[!success, ] |> print(n = Inf) +# +# # prefer_class = FALSE +# x <- lapply(asNamespace("keras3"), resolve_wrapper_py_obj_expr, +# prefer_class = FALSE) |> +# purrr::map_chr(\(expr) if(is.null(expr)) "" else deparse1(expr)) +# df <- tibble::enframe(x, value = "expr") +# df <- df[order(df$name),] +# success <- df$expr != "" +# df[success, ] |> print(n = Inf) +# df[!success, ] |> print(n = Inf)a +# } + + +# as_activation <- NULL + +# on_load_make_as_activation <- function() { +# if (getRversion() < "4.2") { +# as_activation <<- .as_activation +# } else { +# as_activation <<- local({ +# # make a hashtab to do reverse look ups, converting exported closures like +# # `activation_elu` to a builtin activation name string "elu". The +# # motivation is to avoid needlessly popping out to an R closure if we're +# # using a bultin. We have to do this at runtime since the hastab +# # needs the closure object address. +# delayedAssign("h", local({ +# nms <- grep("^activation_", getNamespaceExports("keras3"), value = TRUE) +# h <- utils::hashtab("address", length(nms)) +# ns <- asNamespace("keras3") +# for (name in nms) +# utils::sethash(h, getExportedValue(ns, name), +# substr(name, 12L, 999L)) +# h +# })) +# +# function(x) utils::gethash(h, x) %||% .as_activation(x) +# }) +# } +# } +# +# .as_activation <- function(x) { +# if (is.null(x) || is_py_object(x)) +# return(x) +# +# name <- attr(x, "py_function_name", TRUE) +# if (is_string(name) && identical(x, get0( +# paste0("activation_", name), +# envir = environment(sys.function()), +# inherits = FALSE +# ))) +# # it's a builtin; the name string will be resolved upstream via +# # keras.activations.get(name) +# return(name) +# +# if (is.function(x)) +# return(as_py_function(x, default_name = "custom_activation")) +# x +# } +# + + + +# ---- capture_args ---- +# capture_args_v1 <- +function(cl, modifiers = NULL, ignore = NULL, + envir = parent.frame(), fn = sys.function(-1)) { + + ## bug: match.call() resolves incorrectly if dots are from not the default sys.parent() + ## e.g, this fails if dots originate from the callers caller: + # cl <- eval(quote(match.call()), parent.frame()) + ## workaround: caller must call match.call() from the correct frame + + ## note: capture_args_v1() must always be called at the top level of the intended function body. + ## sys.function(-1) resolves to the incorrect function if the capture_args() + ## call is itself a promise in another call. E.g.,: + ## do.call(foo, capture_args_v1(match.call())) fails because fn resolves to do.call() + + fn_arg_nms <- names(formals(fn)) + known_args <- intersect(names(cl), fn_arg_nms) + known_args <- setdiff(known_args, ignore) + names(known_args) <- known_args + cl2 <- c(quote(list), lapply(known_args, as.symbol)) + + if("..." %in% fn_arg_nms && !"..." %in% ignore) { + assert_all_dots_named(envir, cl) + # this might reorder args by assuming ... are last, but it doesn't matter + # since everything is supplied as a keyword arg to the Python side anyway + cl2 <- c(cl2, quote(...)) + } + + args <- eval(as.call(cl2), envir) + + # check `ignore` again, since arg might have been in `...` + for(nm in intersect(names(args), ignore)) + args[[nm]] <- NULL + + nms_to_modify <- intersect(names(args), names(modifiers)) + for (nm in nms_to_modify) + args[nm] <- list(modifiers[[nm]](args[[nm]])) + # list() so if modifier returns NULL, don't remove the arg + + args +} + + +#' @importFrom rlang list2 +capture_args <- function(modifiers = NULL, ignore = NULL, force = NULL, + enforce_all_dots_named = TRUE) { + call <- sys.call(-1L) + envir <- parent.frame(1L) + fn <- sys.function(-1L) + # if("capture_args" %in% all.names(call, unique = TRUE)) + # stop("incorrect usage of capture_args(), must be evaluated as ", + # "a standard expression, not as not a promise (i.e., not as part ", + # "of a call of another function") + + # match.call() automatically omits missing() args in the returned call. These + # user calls all standardize to the same thing: + # - layer_dense(, 10) + # - layer_dense(object = , 10) + # - layer_dense(object = , 10, ) + # - layer_dense(, 10, ) + # all standardize to: + # - layer_dense(units = 10) + call <- match.call(fn, call, expand.dots = TRUE, envir = parent.frame(2)) + + # message("call: ", deparse1(call)) + + fn_arg_nms <- names(formals(fn)) + known_args <- intersect(names(call), fn_arg_nms) + if (length(ignore) && !is.character(ignore)) { + # e.g., ignore = c("object", \(nms) startsWith(nms, ".")) + ignore <- as.character(unlist(lapply(ignore, function(ig) { + if (is.character(ig)) return(ig) + stopifnot(is.function(ig)) + ig <- ig(known_args) # ignore fn can return either lgl or int for [ + if (!is.character(ig)) + ig <- known_args[ig] + ig + }), use.names = FALSE)) + } + known_args <- setdiff(known_args, ignore) + known_args <- union(known_args, force) + names(known_args) <- known_args + + if ("..." %in% fn_arg_nms && !"..." %in% ignore) { + if (enforce_all_dots_named) + assert_all_dots_named(envir, call) + # match.call already drops missing args that match to known args, but it + # doesn't protect from missing args that matched into ... + # use list2() to allow dropping a trailing missing arg in ... also + dots <- quote(...) + list_sym <- quote(list2) + } else { + dots <- NULL + list_sym <- quote(list) + } + + # this might reorder args by assuming ... are last, but it doesn't matter + # since everything is supplied as a keyword arg to the Python side anyway + call <- as.call(c(list_sym, lapply(known_args, as.symbol), dots)) + args <- eval(call, envir) + + # filter out ignore again, in case any were in ... + # we could probably enhance the `call` constructed above to use, e.g., + # ..1, ..2, ..4, to skip ignores, and avoid forcing them. + if (length(ignores_in_dots <- intersect(names(call), ignore))) + args[ignores_in_dots] <- NULL + + # apply modifier functions. e.g., as_nullable_integer() + if (length(names_to_modify <- + intersect(names(args), names(modifiers)))) + args[names_to_modify] <- + map2(modifiers[names_to_modify], args[names_to_modify], + function(modifier, arg) modifier(arg)) + + args +} + + +capture_args3 <- + function(modifiers = NULL, ignore = NULL) { + # currently unused + # like capture_args(), but will also unpack `!!!args` + # e.g., + # constraints <- list(kernel_constraint = constraint_unitnorm(), + # bias_constraint = constraint_unitnorm()) + # layer_dense(units = 2, !!!constraints) + cl0 <- cl <- sys.call(-1L) + envir <- parent.frame(2L) + fn <- sys.function(-1L) + + # first defuse rlang !!! and := in calls + cl[[1L]] <- rlang::quos + cl_exprs <- eval(cl, envir) + + # build up a call to base::list() using the exprs + cl <- as.call(c(list, cl_exprs)) + + # match.call() + cl <- match.call(fn, cl, + expand.dots = !"..." %in% ignore, + envir = envir) + + # filter out args to ignore + for(ig in intersect(names(cl), ignore)) + cl[[ig]] <- NULL + + # eval and capture args + args <- rlang::eval_tidy(cl, env = envir) + + # apply modifier functions. e.g., as_nullable_integer + nms_to_modify <- intersect(names(args), names(modifiers)) + for (name in nms_to_modify) + # list() so if modifier returns NULL, don't remove the arg + args[name] <- list(modifiers[[name]](args[[name]])) + + args + } + + +modify_intersection <- function(x, modifiers) { + for (name in intersect(names(x), names(modifiers))) { + x[[name]] <- modifiers[[name]](x[[name]]) + } + x +} + + +assert_all_dots_named <- function(envir = parent.frame(), cl) { + + x <- evalq(eval(substitute(alist(...))), envir) + if (!length(x)) return() + + # ignore trailing missing arg + if (identical(x[length(x)], list(quote(expr =)))) + x[[length(x)]] <- NULL + + if (!length(x)) return() + + x <- names(x) + if (is.character(x) && !anyNA(x) && all(x != "")) + return() + + stop("All arguments provided to `...` must be named.\n", + "Call with unnamed arguments in dots:\n ", + paste(deparse(cl, 500L), collapse = "\n")) +} + + + + + + + + + + + + + + + + + +# ---- py helpers ---- + +py_is <- function(x, y) { + is_py_object(x) && + is_py_object(y) && + identical(py_id(x), py_id(y)) +} + +have_module <- function(module) { + tryCatch({ import(module); TRUE; }, error = function(e) FALSE) +} + +have_h5py <- function() { + have_module("h5py") +} + +have_pyyaml <- function() { + have_module("yaml") +} + +have_requests <- function() { + have_module("requests") +} + +have_pillow <- function() { + have_module("PIL") # aka Pillow +} + + + + + + + + + + + +# ---- unused / dead ---- + + +relative_to <- function(dir, file) { + + # normalize paths + dir <- normalizePath(dir, mustWork = FALSE, winslash = "/") + file <- normalizePath(file, mustWork = FALSE, winslash = "/") + + # ensure directory ends with a / + if (!identical(substr(dir, nchar(dir), nchar(dir)), "/")) { + dir <- paste(dir, "/", sep="") + } + + # if the file is prefixed with the directory, return a relative path + if (identical(substr(file, 1, nchar(dir)), dir)) + file <- substr(file, nchar(dir) + 1, nchar(file)) + + # simplify ./ + if (identical(substr(file, 1, 2), "./")) + file <- substr(file, 3, nchar(file)) + + file +} + + +# internal `[` method that ensures functions in this namespace use one-based +# indexing in case user has a global option set for zero-based indexing. + +if (FALSE) { + # roxygen2 now wants this exported. + `[.tensorflow.tensor` <- + getS3method("[", "tensorflow.tensor", envir = asNamespace("tensorflow")) + formals(`[.tensorflow.tensor`)$style <- "R" + formals(`[.tensorflow.tensor`)$options <- + tensorflow::tf_extract_opts( + one_based = TRUE, + inclusive_stop = TRUE, + disallow_out_of_bounds = TRUE, + warn_tensors_passed_asis = FALSE, + warn_negatives_pythonic = FALSE + ) +} + + + +standard_layer_arg_modifiers <- list( + input_shape = normalize_shape, + batch_input_shape = normalize_shape, + batch_size = as_nullable_integer, + seed = as_nullable_integer +) + + +if (getRversion() < "4.0") + activeBindingFunction <- function(nm, env) { + as.list.environment(env, all.names = TRUE)[[nm]] + } + + +# don't dispatch to as.list(), just wrap in list() +as_list <- function(x) if (is.null(x) || is.list(x)) x else list(x) diff --git a/R/random.R b/R/random.R new file mode 100644 index 0000000000..91abef6cdd --- /dev/null +++ b/R/random.R @@ -0,0 +1,498 @@ + + + +#' Draws samples from a categorical distribution. +#' +#' @description +#' This function takes as input `logits`, a 2-D input tensor with shape +#' (batch_size, num_classes). Each row of the input represents a categorical +#' distribution, with each column index containing the log-probability for a +#' given class. +#' +#' The function will output a 2-D tensor with shape (batch_size, num_samples), +#' where each row contains samples from the corresponding row in `logits`. +#' Each column index contains an independent samples drawn from the input +#' distribution. +#' +#' @returns +#' A 2-D tensor with (batch_size, num_samples). +#' +#' @param logits +#' 2-D Tensor with shape (batch_size, num_classes). Each row +#' should define a categorical distibution with the unnormalized +#' log-probabilities for all classes. +#' +#' @param num_samples +#' Int, the number of independent samples to draw for each +#' row of the input. This will be the second dimension of the output +#' tensor's shape. +#' +#' @param dtype +#' Optional dtype of the output tensor. +#' +#' @param seed +#' An R integer or instance of +#' [`random_seed_generator()`]. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of [`random_seed_generator()`]. +#' +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.categorical +random_categorical <- +function (logits, num_samples, dtype = "int32", seed = NULL) +{ + args <- capture_args(list(num_samples = as_integer, seed = as_integer)) + do.call(keras$random$categorical, args) +} + + +#' Randomly set some values in a tensor to 0. +#' +#' @description +#' Randomly set some portion of values in the tensor to 0. +#' +#' @param seed +#' Initial seed for the random number generator +#' +#' @param inputs +#' A tensor +#' +#' @param rate +#' numeric +#' +#' @param noise_shape +#' A `shape()` value +#' +#' @returns A tensor that is a copy of `inputs` with some values set to `0`. +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.dropout +random_dropout <- +function (inputs, rate, noise_shape = NULL, seed = NULL) +{ + args <- capture_args(list(seed = as_integer, noise_shape = normalize_shape)) + do.call(keras$random$dropout, args) +} + + +#' Draw random samples from the Gamma distribution. +#' +#' @param shape +#' The shape of the random values to generate. +#' +#' @param alpha +#' Float, the parameter of the distribution. +#' +#' @param dtype +#' Optional dtype of the tensor. Only floating point types are +#' supported. If not specified, [`config_floatx()`] is used, +#' which defaults to `float32` unless you configured it otherwise (via +#' `config_set_floatx(float_dtype)`). +#' +#' @param seed +#' An R integer or instance of +#' [`random_seed_generator()`]. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of [`random_seed_generator()`]. +#' +#' @returns A tensor of random values. +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.gamma +random_gamma <- +function (shape, alpha, dtype = NULL, seed = NULL) +{ + args <- capture_args(list(shape = normalize_shape, seed = as_integer)) + do.call(keras$random$gamma, args) +} + + +#' Draw random integers from a uniform distribution. +#' +#' @description +#' The generated values follow a uniform distribution in the range +#' `[minval, maxval)`. The lower bound `minval` is included in the range, +#' while the upper bound `maxval` is excluded. +#' +#' `dtype` must be an integer type. +#' +#' @param shape +#' The shape of the random values to generate. +#' +#' @param minval +#' integer, lower bound of the range of +#' random values to generate (inclusive). +#' +#' @param maxval +#' integer, upper bound of the range of +#' random values to generate (exclusive). +#' +#' @param dtype +#' Optional dtype of the tensor. Only integer types are +#' supported. If not specified, `"int32"` is used. +#' +#' @param seed +#' An R integer or instance of +#' [`random_seed_generator()`]. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of [`random_seed_generator()`]. +#' +#' @returns A tensor of random values. +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.randint +random_integer <- +function (shape, minval, maxval, dtype = "int32", seed = NULL) +{ + args <- capture_args(list(shape = normalize_shape, seed = as_integer, + maxval = function (x) + as_integer(ceiling(x)), minval = as_integer)) + do.call(keras$random$randint, args) +} + + +#' Draw random samples from a normal (Gaussian) distribution. +#' +#' @param shape +#' The shape of the random values to generate. +#' +#' @param mean +#' Float, defaults to 0. Mean of the random values to generate. +#' +#' @param stddev +#' Float, defaults to 1. Standard deviation of the random values +#' to generate. +#' +#' @param dtype +#' Optional dtype of the tensor. Only floating point types are +#' supported. If not specified, [`config_floatx()`] is used, +#' which defaults to `float32` unless you configured it otherwise (via +#' `config_set_floatx(float_dtype)`). +#' +#' @param seed +#' An R integer or instance of +#' [`random_seed_generator()`]. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of [`random_seed_generator()`]. +#' +#' @returns A tensor of random values. +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.normal +random_normal <- +function (shape, mean = 0, stddev = 1, dtype = NULL, seed = NULL) +{ + args <- capture_args(list(shape = normalize_shape, seed = as_integer)) + do.call(keras$random$normal, args) +} + + +#' Shuffle the elements of a tensor uniformly at random along an axis. +#' +#' @param x +#' The tensor to be shuffled. +#' +#' @param axis +#' An integer specifying the axis along which to shuffle. Defaults to +#' `0`. +#' +#' @param seed +#' An R integer or instance of +#' [`random_seed_generator()`]. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of [`random_seed_generator()`]. +#' +#' @returns A tensor, a copy of `x` with the `axis` axis shuffled. +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.shuffle +random_shuffle <- +function (x, axis = 1L, seed = NULL) +{ + args <- capture_args(list(axis = as_axis, seed = as_integer)) + do.call(keras$random$shuffle, args) +} + + +#' Draw samples from a truncated normal distribution. +#' +#' @description +#' The values are drawn from a normal distribution with specified mean and +#' standard deviation, discarding and re-drawing any samples that are more +#' than two standard deviations from the mean. +#' +#' @param shape +#' The shape of the random values to generate. +#' +#' @param mean +#' Float, defaults to 0. Mean of the random values to generate. +#' +#' @param stddev +#' Float, defaults to 1. Standard deviation of the random values +#' to generate. +#' +#' @param dtype +#' Optional dtype of the tensor. Only floating point types are +#' supported. If not specified, [`config_floatx()`] is used, +#' which defaults to `float32` unless you configured it otherwise (via +#' `config_set_floatx(float_dtype)`) +#' +#' @param seed +#' An R integer or instance of +#' [`random_seed_generator()`]. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of [`random_seed_generator()`]. +#' +#' @returns A tensor of random values. +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.truncated_normal +random_truncated_normal <- +function (shape, mean = 0, stddev = 1, dtype = NULL, seed = NULL) +{ + args <- capture_args(list(shape = normalize_shape, seed = as_integer)) + do.call(keras$random$truncated_normal, args) +} + + +#' Draw samples from a uniform distribution. +#' +#' @description +#' The generated values follow a uniform distribution in the range +#' `[minval, maxval)`. The lower bound `minval` is included in the range, +#' while the upper bound `maxval` is excluded. +#' +#' `dtype` must be a floating point type, the default range is `[0, 1)`. +#' +#' @param shape +#' The shape of the random values to generate. +#' +#' @param minval +#' Float, defaults to 0. Lower bound of the range of +#' random values to generate (inclusive). +#' +#' @param maxval +#' Float, defaults to 1. Upper bound of the range of +#' random values to generate (exclusive). +#' +#' @param dtype +#' Optional dtype of the tensor. Only floating point types are +#' supported. If not specified, [`config_floatx()`] is used, +#' which defaults to `float32` unless you configured it otherwise (via +#' `config_set_floatx(float_dtype)`) +#' +#' @param seed +#' An R integer or instance of +#' [`random_seed_generator()`]. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or `NULL` (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of [`random_seed_generator()`]. +#' +#' @returns A tensor of random values. +#' @export +#' @family random +# @seealso +# + +#' @tether keras.random.uniform +random_uniform <- +function (shape, minval = 0, maxval = 1, dtype = NULL, seed = NULL) +{ + args <- capture_args(list(shape = normalize_shape, seed = as_integer)) + do.call(keras$random$uniform, args) +} + + +#' Generates variable seeds upon each call to a RNG-using function. +#' +#' @description +#' In Keras, all RNG-using methods (such as `random_normal()`) +#' are stateless, meaning that if you pass an integer seed to them +#' (such as `seed = 42`), they will return the same values at each call. +#' In order to get different values at each call, you must use a +#' `SeedGenerator` instead as the seed argument. The `SeedGenerator` +#' object is stateful. +#' +#' # Examples +#' ```{r} +#' seed_gen <- random_seed_generator(seed = 42) +#' values <- random_normal(shape = c(2, 3), seed = seed_gen) +#' new_values <- random_normal(shape = c(2, 3), seed = seed_gen) +#' ``` +#' +#' Usage in a layer: +#' +#' ```{r} +#' layer_dropout2 <- new_layer_class( +#' "dropout2", +#' initialize = function(...) { +#' super$initialize(...) +#' self$seed_generator <- random_seed_generator(seed = 1337) +#' }, +#' call = function(x, training = FALSE) { +#' if (training) { +#' return(random_dropout(x, rate = 0.5, seed = self$seed_generator)) +#' } +#' return(x) +#' } +#' ) +#' +#' out <- layer_dropout(rate = 0.8) +#' out(op_ones(10), training = TRUE) +#' ``` +#' +#' @param seed +#' Initial seed for the random number generator +#' +#' @param name String, name for the object +#' +#' @param ... +#' For forward/backward compatability. +#' +#' @returns A `SeedGenerator` instance, which can be passed as the `seed = ` +#' argument to other random tensor generators. +#' @export +#' @family random +# @seealso +# + +#' +#' @tether keras.random.SeedGenerator +random_seed_generator <- +function (seed = NULL, name = NULL, ...) +{ + args <- capture_args(list(seed = as_integer)) + do.call(keras$random$SeedGenerator, args) +} + +#' Draw samples from a Beta distribution. +#' +#' @description +#' The values are drawm from a Beta distribution parametrized +#' by alpha and beta. +#' +#' @param shape +#' The shape of the random values to generate. +#' +#' @param alpha +#' Float or an array of floats representing the first +#' parameter alpha. Must be broadcastable with `beta` and `shape`. +#' +#' @param beta +#' Float or an array of floats representing the second +#' parameter beta. Must be broadcastable with `alpha` and `shape`. +#' +#' @param dtype +#' Optional dtype of the tensor. Only floating point types are +#' supported. If not specified, `config_floatx()` is used, +#' which defaults to `"float32"` unless you configured it otherwise (via +#' `config_set_floatx(float_dtype)`). +#' +#' @param seed +#' An integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or NULL (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @export +#' @returns A tensor of random values. +#' @family random +#' @tether keras.random.beta +random_beta <- +function (shape, alpha, beta, dtype = NULL, seed = NULL) +{ + args <- capture_args(list(shape = normalize_shape, seed = as_integer)) + do.call(keras$random$beta, args) +} + + +#' Draw samples from a Binomial distribution. +#' +#' @description +#' The values are drawn from a Binomial distribution with +#' specified trial count and probability of success. +#' +#' @param shape +#' The shape of the random values to generate. +#' +#' @param counts +#' A number or array of numbers representing the +#' number of trials. It must be broadcastable with `probabilities`. +#' +#' @param probabilities +#' A float or array of floats representing the +#' probability of success of an individual event. +#' It must be broadcastable with `counts`. +#' +#' @param dtype +#' Optional dtype of the tensor. Only floating point types are +#' supported. If not specified, `config_floatx()` is used, +#' which defaults to `"float32"` unless you configured it otherwise (via +#' `config_set_floatx(float_dtype)`). +#' +#' @param seed +#' A Python integer or instance of +#' `random_seed_generator()`. +#' Used to make the behavior of the initializer +#' deterministic. Note that an initializer seeded with an integer +#' or None (unseeded) will produce the same random values +#' across multiple calls. To get different random values +#' across multiple calls, use as seed an instance +#' of `random_seed_generator()`. +#' +#' @export +#' @returns A tensor of random values. +#' @family random +#' @tether keras.random.binomial +#' @seealso +#' + +random_binomial <- +function (shape, counts, probabilities, dtype = NULL, seed = NULL) +{ + args <- capture_args(list(shape = normalize_shape, seed = as_integer)) + do.call(keras$random$binomial, args) +} diff --git a/R/reexports.R b/R/reexports.R index 6ac5f8467e..181cc072c8 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -5,8 +5,12 @@ #' @name %>% #' @rdname pipe #' @keywords internal +#' @returns Most commonly, the result of calling the right hand side with the +#' left hand side as an argument: `rhs(lhs)`. See the magritter vignette for +#' other, more advanced, usages. #' @export -#' @import magrittr +#' @export +#' @importFrom magrittr %<>% %>% #' @usage lhs \%>\% rhs NULL @@ -21,8 +25,10 @@ magrittr::`%<>%` #' @name %<-% #' @rdname multi-assign #' @keywords internal +#' @returns The right-hand-side argument, `value`, invisibly. This called +#' primarily for it's side-effect of assigning symbols in the current frame. #' @export -#' @import zeallot +#' @importFrom zeallot %<-% #' @usage x \%<-\% value NULL @@ -34,42 +40,43 @@ reticulate::use_python #' @export reticulate::use_virtualenv -#' @importFrom reticulate use_condaenv -#' @export -reticulate::use_condaenv - #' @importFrom reticulate array_reshape #' @export reticulate::array_reshape +#' @importFrom reticulate np_array +#' @export +reticulate::np_array + #' @importFrom reticulate tuple #' @export reticulate::tuple -#' @importFrom tensorflow use_session_with_seed #' @export -tensorflow::use_session_with_seed +reticulate::iter_next -#' @importFrom tensorflow tensorboard #' @export -tensorflow::tensorboard +reticulate::iterate -#' @importFrom tensorflow evaluate #' @export -tensorflow::evaluate +reticulate::as_iterator -#' @importFrom tensorflow export_savedmodel +#' @importFrom tensorflow tensorboard #' @export -tensorflow::export_savedmodel +tensorflow::tensorboard -#' @importFrom tensorflow shape +#' @importFrom tensorflow export_savedmodel #' @export -tensorflow::shape +tensorflow::export_savedmodel #' @importFrom tensorflow as_tensor #' @export tensorflow::as_tensor +#' @importFrom tensorflow all_dims +#' @export +tensorflow::all_dims + #' @importFrom tfruns flags #' @export tfruns::flags @@ -101,3 +108,17 @@ generics::fit #' @importFrom generics compile #' @export generics::compile + +# ' @importFrom generics evaluate +# ' @export +# generics::evaluate +## generics::evaluate() has a different signature from tensorflow::evaluate() +## evaluate(x, ...) vs evaluate(object, ...) +## We obviously can't dispatch on `x` in the evaluate() method keras uses, since +## thats a named argument for the dataset. Meaning we can't use +## generics::evaluate(). To drop the tensorflow dep, Seems like we'll have to +## eventually export a `keras3::evaluate()` generic. + +#' @importFrom tensorflow evaluate +#' @export +tensorflow::evaluate diff --git a/R/regularizers.R b/R/regularizers.R index 48b072e688..01f4423234 100644 --- a/R/regularizers.R +++ b/R/regularizers.R @@ -1,60 +1,151 @@ -#' L1 and L2 regularization + + +#' A regularizer that applies a L1 regularization penalty. +#' +#' @description +#' The L1 regularization penalty is computed as: +#' `loss = l1 * reduce_sum(abs(x))` +#' +#' L1 may be passed to a layer as a string identifier: +#' +#' ```{r} +#' dense <- layer_dense(units = 3, kernel_regularizer = 'l1') +#' ``` #' -#' @param l Regularization factor. -#' @param l1 L1 regularization factor. -#' @param l2 L2 regularization factor. +#' In this case, the default value used is `l1=0.01`. #' +#' @param l1 +#' float, L1 regularization factor. +#' +#' @returns A `Regularizer` instance that can be passed to layer constructors or +#' used as a standalone object. #' @export -regularizer_l1 <- function(l = 0.01) { - keras$regularizers$l1(l = l) +#' @family regularizers +#' @seealso +#' + +# + +#' +#' @tether keras.regularizers.L1 +regularizer_l1 <- +function (l1 = 0.01) +{ + args <- capture_args() + do.call(keras$regularizers$L1, args) } -#' @rdname regularizer_l1 + +#' A regularizer that applies both L1 and L2 regularization penalties. +#' +#' @description +#' The L1 regularization penalty is computed as: +#' `loss = l1 * reduce_sum(abs(x))` +#' +#' The L2 regularization penalty is computed as +#' `loss = l2 * reduce_sum(square(x))` +#' +#' L1L2 may be passed to a layer as a string identifier: +#' +#' ```{r} +#' dense <- layer_dense(units = 3, kernel_regularizer = 'L1L2') +#' ``` +#' +#' In this case, the default values used are `l1=0.01` and `l2=0.01`. +#' +#' @param l1 +#' float, L1 regularization factor. +#' +#' @param l2 +#' float, L2 regularization factor. +#' +#' +#' @inherit regularizer_l1 return #' @export -regularizer_l2 <- function(l = 0.01) { - keras$regularizers$l2(l = l) +#' @family regularizers +#' @seealso +#' + +# + +#' +#' @tether keras.regularizers.L1L2 +regularizer_l1_l2 <- +function (l1 = 0, l2 = 0) +{ + args <- capture_args() + do.call(keras$regularizers$L1L2, args) } -#' @rdname regularizer_l1 + +#' A regularizer that applies a L2 regularization penalty. +#' +#' @description +#' The L2 regularization penalty is computed as: +#' `loss = l2 * reduce_sum(square(x))` +#' +#' L2 may be passed to a layer as a string identifier: +#' +#' ```{r} +#' dense <- layer_dense(units = 3, kernel_regularizer='l2') +#' ``` +#' +#' In this case, the default value used is `l2=0.01`. +#' +#' @param l2 +#' float, L2 regularization factor. +#' +#' @inherit regularizer_l1 return #' @export -regularizer_l1_l2 <- function(l1 = 0.01, l2 = 0.01) { - keras$regularizers$l1_l2(l1 = l1, l2 = l2) +#' @family regularizers +#' @seealso +#' + +# + +#' +#' @tether keras.regularizers.L2 +regularizer_l2 <- +function (l2 = 0.01) +{ + args <- capture_args() + do.call(keras$regularizers$L2, args) } -#' A regularizer that encourages input vectors to be orthogonal to each other +#' Regularizer that encourages input vectors to be orthogonal to each other. #' -#' @details +#' @description #' It can be applied to either the rows of a matrix (`mode="rows"`) or its #' columns (`mode="columns"`). When applied to a `Dense` kernel of shape #' `(input_dim, units)`, rows mode will seek to make the feature vectors #' (i.e. the basis of the output space) orthogonal to each other. #' -#' @param factor Float. The regularization factor. The regularization penalty will -#' be proportional to `factor` times the mean of the dot products between -#' the L2-normalized rows (if `mode="rows"`, or columns if `mode="columns"`) -#' of the inputs, excluding the product of each row/column with itself. -#' Defaults to 0.01. +#' # Examples +#' ```{r} +#' regularizer <- regularizer_orthogonal(factor=0.01) +#' layer <- layer_dense(units=4, kernel_regularizer=regularizer) +#' ``` #' -#' @param mode String, one of `{"rows", "columns"}`. Defaults to `"rows"`. In rows -#' mode, the regularization effect seeks to make the rows of the input -#' orthogonal to each other. In columns mode, it seeks to make the columns -#' of the input orthogonal to each other. -#' @param ... For backwards and forwards compatibility +#' @param factor +#' Float. The regularization factor. The regularization penalty +#' will be proportional to `factor` times the mean of the dot products +#' between the L2-normalized rows (if `mode="rows"`, or columns if +#' `mode="columns"`) of the inputs, excluding the product of each +#' row/column with itself. Defaults to `0.01`. #' -#' ````r -#' layer <- layer_dense( -#' units = 4, -#' kernel_regularizer = regularizer_orthogonal(factor = 0.01)) -#' ```` +#' @param mode +#' String, one of `{"rows", "columns"}`. Defaults to `"rows"`. In +#' rows mode, the regularization effect seeks to make the rows of the +#' input orthogonal to each other. In columns mode, it seeks to make +#' the columns of the input orthogonal to each other. #' -#' @seealso -#' + +#' @inherit regularizer_l1 return #' @export +#' @family regularizers +#' @seealso +#' + +# + +#' +#' @tether keras.regularizers.OrthogonalRegularizer regularizer_orthogonal <- -function(factor = 0.01, mode = "rows", ...) +function (factor = 0.01, mode = "rows") { - args <- capture_args(match.call(), NULL) - do.call(keras$regularizers$OrthogonalRegularizer, args) + args <- capture_args() + do.call(keras$regularizers$OrthogonalRegularizer, args) } diff --git a/R/s3-methods.R b/R/s3-methods.R new file mode 100644 index 0000000000..852ef6f58f --- /dev/null +++ b/R/s3-methods.R @@ -0,0 +1,75 @@ + +#' @export +`==.keras.src.backend.common.keras_tensor.KerasTensor` <- function(e1, e2) { + op_equal(e1, e2) +} + +#' @export +`+.keras.src.backend.common.keras_tensor.KerasTensor` <- function(e1, e2) { + if(missing(e2)) return(e1) + NextMethod() +} + + +#' @export +as.array.keras.src.backend.common.variables.KerasVariable <- function(x, ...) { + as_r_value(keras$ops$convert_to_numpy(x)) +} + +#' @export +as.numeric.keras.src.backend.common.variables.KerasVariable <- function(x, ...) { + as.numeric(as_r_value(keras$ops$convert_to_numpy(x))) +} + +#' @export +as.double.keras.src.backend.common.variables.KerasVariable <- function(x, ...) { + as.double(as_r_value(keras$ops$convert_to_numpy(x))) +} + +#' @export +as.integer.keras.src.backend.common.variables.KerasVariable <- function(x, ...) { + as.integer(as_r_value(keras$ops$convert_to_numpy(x))) +} + + +#' @exportS3Method base::all.equal +all.equal.keras.src.backend.common.variables.KerasVariable <- +function(target, current, ...) { + if (inherits(target, "keras.src.backend.common.variables.KerasVariable")) + target <- as_r_value(target$numpy()) + if (inherits(current, "keras.src.backend.common.variables.KerasVariable")) + current <- as_r_value(current$numpy()) + all.equal(target, current, ...) +} + + +## This method isn't the best semantic match for all.equal(), but identical() +## isn't a generic, and doesn't work correctly for comparing python objects (it +## returns false if the pyref environment isn't the same exact environment, even +## if the pyrefs are wrapping the same py object), and there isn't a great +## (exported) way to compare if two # tensors are the same that doesn't leak +## python concepts... +#' @exportS3Method base::all.equal +all.equal.keras.src.backend.common.keras_tensor.KerasTensor <- +function(target, current, ...) { + inherits(target, "keras.src.backend.common.keras_tensor.KerasTensor") && + inherits(current, "keras.src.backend.common.keras_tensor.KerasTensor") && + py_id(target) == py_id(current) +} + +## Conditionally export these py_to_r methods, if tensorflow hasn't already exported them. +## We do this to keep keras3 and tensorflow decoupled, but to avoid +## "S3 method overwritten" warnings if both packages are loaded. +## +## Note, we still may need to revisit this; either to disable it, or export a custom $<- method +## for base classes like Layer, so that compound assignment expressions aren't a +## problem. +## +# these S3 methods are conditionally registered in .onLoad() instead of in NAMESPACE. +# __ instead of . to avoid a roxygen warning about unexported S3 methods when generating NAMESPACE +py_to_r__keras.src.utils.tracking.TrackedDict <- function(x) import("builtins")$dict(x) + +py_to_r__keras.src.utils.tracking.TrackedList <- function(x) import("builtins")$list(x) + +py_to_r__keras.src.utils.tracking.TrackedSet <- function(x) import("builtins")$list(x) + diff --git a/R/seed.R b/R/seed.R deleted file mode 100644 index c6efec7c34..0000000000 --- a/R/seed.R +++ /dev/null @@ -1,17 +0,0 @@ - - -tensorflow_on_before_use_session <- function(quiet) { - if (is_backend("tensorflow")) { - keras$backend$clear_session() - TRUE - } else { - FALSE - } -} - -tensorflow_on_use_session <- function(sess, quiet) { - if (is_backend("tensorflow")) { - if (tensorflow::tf_version() < "2.0") - keras$backend$set_session(sess) - } -} diff --git a/R/shape.R b/R/shape.R new file mode 100644 index 0000000000..7a314833c2 --- /dev/null +++ b/R/shape.R @@ -0,0 +1,215 @@ + +#' Tensor shape utility +#' +#' This function can be used to create or get the shape of an object. +#' +#' # Examples +#' ```{r} +#' shape(1, 2, 3) +#' ``` +#' +#' 3 ways to specify an unknown dimension +#' ```{r, results = "hold"} +#' shape(NA, 2, 3) +#' shape(NULL, 2, 3) +#' shape(-1, 2, 3) +#' ``` +#' +#' Most functions that take a 'shape' argument also coerce with `shape()` +#' ```{r, results = "hold"} +#' layer_input(c(1, 2, 3)) +#' layer_input(shape(1, 2, 3)) +#' ``` +#' +#' You can also use `shape()` to get the shape of a tensor +#' (excepting scalar integer tensors). +#' ```{r} +#' symbolic_tensor <- layer_input(shape(1, 2, 3)) +#' shape(symbolic_tensor) +#' +#' eager_tensor <- op_ones(c(1,2,3)) +#' shape(eager_tensor) +#' op_shape(eager_tensor) +#' ``` +#' +#' Combine or expand shapes +#' ```{r} +#' shape(symbolic_tensor, 4) +#' shape(5, symbolic_tensor, 4) +#' ``` +#' +#' Scalar integer tensors are treated as axis values. These are most commonly +#' encountered when tracing a function in graph mode, where an axis size might +#' be unknown. +#' ```{r} +#' tfn <- tensorflow::tf_function(function(x) { +#' print(op_shape(x)) +#' x +#' }, +#' input_signature = list(tensorflow::tf$TensorSpec(shape(1, NA, 3)))) +#' invisible(tfn(op_ones(shape(1, 2, 3)))) +#' ``` +#' +#' A useful pattern is to unpack the `shape()` with `%<-%`, like this: +#' ```r +#' c(batch_size, seq_len, channels) %<-% shape(x) +#' ``` +#' +#' ```{r} +#' echo_print <- function(x) { +#' message("> ", deparse(substitute(x))); +#' if(!is.null(x)) print(x) +#' } +#' tfn <- tensorflow::tf_function(function(x) { +#' c(axis1, axis2, axis3) %<-% shape(x) +#' echo_print(str(list(axis1 = axis1, axis2 = axis2, axis3 = axis3))) +#' +#' echo_print(shape(axis1)) # use axis1 tensor as axis value +#' echo_print(shape(axis1, axis2, axis3)) # use axis1 tensor as axis value +#' +#' # use shape() to compose a new shape, e.g., in multihead attention +#' n_heads <- 4 +#' echo_print(shape(axis1, axis2, n_heads, axis3/n_heads)) +#' +#' x +#' }, +#' input_signature = list(tensorflow::tf$TensorSpec(shape(NA, 4, 16)))) +#' invisible(tfn(op_ones(shape(2, 4, 16)))) +#' ``` +#' +#' If you want to resolve the shape of a tensor that can potentially be +#' a scalar integer, you can wrap the tensor in `I()`, or use [`op_shape()`]. +#' ```{r} +#' (x <- op_convert_to_tensor(2L)) +#' +#' # by default, shape() treats scalar integer tensors as axis values +#' shape(x) +#' +#' # to access the shape of a scalar integer, +#' # call `op_shape()`, or protect with `I()` +#' op_shape(x) +#' shape(I(x)) +#' ``` +#' +#' @param ... A shape specification. Numerics, `NULL` and tensors are valid. +#' `NULL`, `NA`, and `-1L` can be used to specify an unspecified dim size. +#' Tensors are dispatched to `op_shape()` to extract the tensor shape. Values +#' wrapped in `I()` are used asis (see examples). All other objects are coerced +#' via `as.integer()`. +#' +#' @returns A list with a `"keras_shape"` class attribute. Each element of the +#' list will be either a) `NULL`, b) an integer or c) a scalar integer tensor +#' (e.g., when supplied a TF tensor with a unspecified dimension in a function +#' being traced). +#' +#' @export +#' @seealso [op_shape()] +shape <- function(...) { + + fix <- function(x) { + + if (is_py_object(x)) { + if (inherits(x, "tensorflow.python.framework.tensor_shape.TensorShape")) + return(map_int(as.list(as_r_value(x$as_list())), + function(e) e %||% NA_integer_)) + + shp <- keras$ops$shape(x) + + # convert subclassed tuples, as encountered in Torch + # class(shp): torch.Size, python.builtin.tuple, python.builtin.object + if(inherits(shp, "python.builtin.tuple")) + shp <- import("builtins")$tuple(shp) + + # scalar integer tensors, unprotected with I(), are treated as an axis value + if (identical(shp, list()) && keras$backend$is_int_dtype(x$dtype)) { + if (!inherits(x, "AsIs")) + return(x) + } + + # otherwise, (most common path) shape() is a tensor shape accessor + return(lapply(shp, function(d) d %||% NA_integer_)) + } + + ## TODO: shape() + ## Users may pass R arrays to shape(), expecting it to behave like dim(). + ## If we accept them, the edgecase of 1-d arrays gets tricky (esp because + ## numpy vectors arrays get converted to 1d R arrays) + ## If we accept simple R arrays and treat them the same as Tensors, + ## i.e., shape() is synonym for dim(), return dim(x) + # if(!is.object(x) && is.atomic(x) && + # !is.null(attr(x, "dim", TRUE))) + # return(dim(x)) + ## or we warn + # if (!is.null(dim(x)) && length(x) > 200) + # warning("Did you pass an R array to shape()? Did you mean to use dim()?") + + if (is.null(x) || + identical(x, NA_integer_) || + identical(x, NA_real_) || + identical(x, NA) || + (is.numeric(x) && isTRUE(suppressWarnings(x == -1L)))) + NA_integer_ # so we can safely unlist() + else if (!is.atomic(x) || length(x) > 1) + lapply(x, fix) + else + as.integer(x) + } + + shp <- unlist(lapply(list(...), fix), use.names = FALSE) + shp <- lapply(shp, function(x) if (identical(x, NA_integer_)) NULL else x) + class(shp) <- "keras_shape" + shp +} + +#' @export +#' @rdname shape +#' @param x A 'keras_shape' object +#' @param prefix Whether to format the shape object with a prefix. Defaults to +#' `"shape"`. +format.keras_shape <- function(x, ..., prefix = TRUE) { + x <- vapply(x, function(d) format(d %||% "NA"), "") + x <- paste0(x, collapse = ", ") + if(isTRUE(prefix)) + prefix <- "shape" + else if (!is_string(prefix)) + prefix <- "" + paste0(prefix, "(", x, ")") +} + +#' @export +#' @rdname shape +print.keras_shape <- function(x, ...) { + writeLines(format(x, ...)) + invisible(x) +} + +#' @rdname shape +#' @export +`[.keras_shape` <- function(x, ...) { + out <- unclass(x)[...] + class(out) <- class(x) + out +} + +#' @export +r_to_py.keras_shape <- function(x, convert = FALSE) { + tuple(x, convert = convert) +} + +#' @rdname shape +#' @export +as.integer.keras_shape <- function(x, ...) { + vapply(x, function(el) el %||% NA_integer_, 1L) +} + +#' @importFrom zeallot destructure +#' @export +destructure.keras_shape <- function(x) unclass(x) + +#' @rdname shape +#' @export +as.list.keras_shape <- function(x, ...) unclass(x) + +# ' @rdname shape +# ' @export +# c.keras_shape <- function(...) shape(...) diff --git a/R/tensorflow-hooks.R b/R/tensorflow-hooks.R new file mode 100644 index 0000000000..1a933c003f --- /dev/null +++ b/R/tensorflow-hooks.R @@ -0,0 +1,26 @@ + + +tensorflow_on_before_use_session <- function(quiet) { + if (identical(config_backend(), "tensorflow")) { + tryCatch( + keras$utils$clear_session(), + python.builtin.AttributeError = function(e) { + tryCatch( + keras$backend$clear_session(), + error = function(e2) + stop(e) + ) + } + ) + TRUE + } else { + FALSE + } +} + +tensorflow_on_use_session <- function(sess, quiet) { + if (is_backend("tensorflow")) { + if (tensorflow::tf_version() < "2.0") + keras$backend$set_session(sess) + } +} diff --git a/R/timeseries.R b/R/timeseries.R deleted file mode 100644 index 60eabd48ab..0000000000 --- a/R/timeseries.R +++ /dev/null @@ -1,44 +0,0 @@ - -#' Utility function for generating batches of temporal data. -#' -#' @param data Object containing consecutive data points (timesteps). The data -#' should be 2D, and axis 1 is expected to be the time dimension. -#' @param targets Targets corresponding to timesteps in `data`. -#' It should have same length as `data`. -#' @param length Length of the output sequences (in number of timesteps). -#' @param sampling_rate Period between successive individual timesteps -#' within sequences. For rate `r`, timesteps `data[i]`, `data[i-r]`, ... `data[i - length]` -#' are used for create a sample sequence. -#' @param stride Period between successive output sequences. -#' For stride `s`, consecutive output samples would -#' be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. -#' @param start_index,end_index Data points earlier than `start_index` -#' or later than `end_index` will not be used in the output sequences. -#' This is useful to reserve part of the data for test or validation. -#' @param shuffle Whether to shuffle output samples, -#' or instead draw them in chronological order. -#' @param reverse Boolean: if `true`, timesteps in each output sample will be -#' in reverse chronological order. -#' @param batch_size Number of timeseries samples in each batch -#' (except maybe the last one). -#' -#' @return An object that can be passed to generator based training -#' functions (e.g. [fit_generator()]).ma -#' -#' @export -timeseries_generator <- function(data, targets, length, sampling_rate = 1, - stride = 1, start_index = 0, end_index = NULL, - shuffle = FALSE, reverse = FALSE, batch_size = 128) { - keras$preprocessing$sequence$TimeseriesGenerator( - data = keras_array(data), - targets = keras_array(targets), - length = as.integer(length), - sampling_rate = as.integer(sampling_rate), - stride = as.integer(stride), - start_index = as.integer(start_index), - end_index = as_nullable_integer(end_index), - shuffle = shuffle, - reverse = reverse, - batch_size = as.integer(batch_size) - ) -} diff --git a/R/utils.R b/R/utils.R index df1052126e..fb03081d8e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,229 +1,505 @@ -resolve_utils <- function() { - keras$utils -} -#' Converts a class vector (integers) to binary class matrix. + + +#' Resets all state generated by Keras. +#' +#' @description +#' Keras manages a global state, which it uses to implement the Functional +#' model-building API and to uniquify autogenerated layer names. +#' +#' If you are creating many models in a loop, this global state will consume +#' an increasing amount of memory over time, and you may want to clear it. +#' Calling `clear_session()` releases the global state: this helps avoid +#' clutter from old models and layers, especially when memory is limited. +#' +#' Example 1: calling `clear_session()` when creating models in a loop +#' +#' ```{r} +#' for (i in 1:100) { +#' # Without `clear_session()`, each iteration of this loop will +#' # slightly increase the size of the global state managed by Keras +#' model <- keras_model_sequential() +#' for (j in 1:10) { +#' model <- model |> layer_dense(units = 10) +#' } +#' } +#' +#' for (i in 1:100) { +#' # With `clear_session()` called at the beginning, +#' # Keras starts with a blank state at each iteration +#' # and memory consumption is constant over time. +#' clear_session() +#' model <- keras_model_sequential() +#' for (j in 1:10) { +#' model <- model |> layer_dense(units = 10) +#' } +#' } +#' ``` #' -#' @details -#' E.g. for use with [loss_categorical_crossentropy()]. +#' Example 2: resetting the layer name generation counter #' -#' @param y Class vector to be converted into a matrix (integers from 0 to num_classes). -#' @param num_classes Total number of classes. -#' @param dtype The data type expected by the input, as a string -# (`float32`, `float64`, `int32`...) +#' ```{r, include = FALSE} +#' clear_session() +#' ``` #' -#' @return A binary matrix representation of the input. #' +#' ```{r} +#' layers <- lapply(1:10, \(i) layer_dense(units = 10)) +#' +#' new_layer <- layer_dense(units = 10) +#' print(new_layer$name) +#' +#' clear_session() +#' new_layer <- layer_dense(units = 10) +#' print(new_layer$name) +#' ``` +#' +#' @param free_memory +#' Whether to call Python garbage collection. +#' It's usually a good practice to call it to make sure +#' memory used by deleted objects is immediately freed. +#' However, it may take a few seconds to execute, so +#' when using `clear_session()` in a short loop, +#' you may want to skip it. +#' +#' @returns `NULL`, invisibly, called for side effects. #' @export -to_categorical <- function(y, num_classes = NULL, dtype = "float32") { - - args <- list( - y = y, - num_classes = as_nullable_integer(num_classes) - ) +#' @family backend +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.clear_session +clear_session <- +function (free_memory = TRUE) +{ + args <- capture_args() + do.call(keras$utils$clear_session, args) +} - if (keras_version() >= "2.2.3") - args$dtype <- dtype - do.call(resolve_utils()$to_categorical, args) +#' Returns the list of input tensors necessary to compute `tensor`. +#' +#' @description +#' Output will always be a list of tensors +#' (potentially with 1 element). +#' +#' # Example +#' +#' ```{r} +#' input <- keras_input(c(3)) +#' output <- input |> layer_dense(4) |> op_multiply(5) +#' reticulate::py_id(get_source_inputs(output)[[1]]) == +#' reticulate::py_id(input) +#' ``` +#' +#' @returns +#' List of input tensors. +#' +#' @param tensor +#' The tensor to start from. +#' +#' @export +#' @family utils +# @seealso +# + +#' @tether keras.utils.get_source_inputs +get_source_inputs <- +function (tensor) +{ + keras$utils$get_source_inputs(tensor) } #' Downloads a file from a URL if it not already in the cache. #' -#' Passing the MD5 hash will verify the file after download as well as if it is -#' already present in the cache. -#' -#' @param fname Name of the file. If an absolute path `/path/to/file.txt` is -#' specified the file will be saved at that location. -#' @param origin Original URL of the file. -#' @param file_hash The expected hash string of the file after download. The -#' sha256 and md5 hash algorithms are both supported. -#' @param cache_subdir Subdirectory under the Keras cache dir where the file is -#' saved. If an absolute path `/path/to/folder` is specified the file will be -#' saved at that location. -#' @param hash_algorithm Select the hash algorithm to verify the file. options -#' are 'md5', 'sha256', and 'auto'. The default 'auto' detects the hash -#' algorithm in use. -#' @param extract True tries extracting the file as an Archive, like tar or zip. -#' @param archive_format Archive format to try for extracting the file. Options -#' are 'auto', 'tar', 'zip', and None. 'tar' includes tar, tar.gz, and tar.bz -#' files. The default 'auto' is ('tar', 'zip'). None or an empty list will -#' return no matches found. -#' @param cache_dir Location to store cached files, when `NULL` it defaults to -#' the Keras configuration directory. -#' @param untar Deprecated in favor of 'extract'. boolean, whether the file should -#' be decompressed -#' -#' @return Path to the downloaded file +#' @description +#' By default the file at the url `origin` is downloaded to the +#' cache_dir `~/.keras`, placed in the cache_subdir `datasets`, +#' and given the filename `fname`. The final location of a file +#' `example.txt` would therefore be `~/.keras/datasets/example.txt`. +#' Files in `.tar`, `.tar.gz`, `.tar.bz`, and `.zip` formats can +#' also be extracted. +#' +#' Passing a hash will verify the file after download. The command line +#' programs `shasum` and `sha256sum` can compute the hash. +#' +#' # Examples +#' ```{r} +#' path_to_downloaded_file <- get_file( +#' origin = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz", +#' extract = TRUE +#' ) +#' ``` +#' +#' @returns +#' Path to the downloaded file. +#' +#' ** Warning on malicious downloads ** +#' +#' Downloading something from the Internet carries a risk. +#' NEVER download a file/archive if you do not trust the source. +#' We recommend that you specify the `file_hash` argument +#' (if the hash of the source file is known) to make sure that the file you +#' are getting is the one you expect. +#' +#' @param fname +#' Name of the file. If an absolute path, e.g. `"/path/to/file.txt"` +#' is specified, the file will be saved at that location. +#' If `NULL`, the name of the file at `origin` will be used. +#' +#' @param origin +#' Original URL of the file. +#' +# @param untar +# Deprecated in favor of `extract` argument. +# boolean, whether the file should be decompressed +#' +# @param md5_hash +# Deprecated in favor of `file_hash` argument. +# md5 hash of the file for verification +#' +#' @param file_hash +#' The expected hash string of the file after download. +#' The sha256 and md5 hash algorithms are both supported. +#' +#' @param cache_subdir +#' Subdirectory under the Keras cache dir where the file is +#' saved. If an absolute path, e.g. `"/path/to/folder"` is +#' specified, the file will be saved at that location. +#' +#' @param hash_algorithm +#' Select the hash algorithm to verify the file. +#' options are `"md5'`, `"sha256'`, and `"auto'`. +#' The default 'auto' detects the hash algorithm in use. +#' +#' @param extract +#' `TRUE` tries extracting the file as an Archive, like tar or zip. +#' +#' @param archive_format +#' Archive format to try for extracting the file. +#' Options are `"auto'`, `"tar'`, `"zip'`, and `NULL`. +#' `"tar"` includes tar, tar.gz, and tar.bz files. +#' The default `"auto"` corresponds to `c("tar", "zip")`. +#' `NULL` or an empty list will return no matches found. +#' +#' @param cache_dir +#' Location to store cached files, when `NULL` it +#' defaults to `Sys.getenv("KERAS_HOME", "~/.keras/")`. +#' +#' @param force_download +#' If `TRUE`, the file will always be re-downloaded +#' regardless of the cache state. +#' +#' @param ... For forward/backward compatability. #' #' @export -get_file <- function(fname, origin, file_hash = NULL, cache_subdir = "datasets", - hash_algorithm = "auto", extract = FALSE, - archive_format = "auto", cache_dir = NULL, - untar = FALSE) { - resolve_utils()$get_file( - fname = normalize_path(fname), - origin = origin, - file_hash = file_hash, - cache_subdir = cache_subdir, - hash_algorithm = hash_algorithm, - extract = extract, - archive_format = archive_format, - cache_dir = normalize_path(cache_dir), - untar = untar - ) +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.get_file +get_file <- +function (fname = NULL, origin = NULL, ..., + file_hash = NULL, cache_subdir = "datasets", hash_algorithm = "auto", + extract = FALSE, archive_format = "auto", cache_dir = NULL, + force_download = FALSE) +{ + args <- capture_args() + do.call(keras$utils$get_file, args) } -#' Representation of HDF5 dataset to be used instead of an R array + +# Convert a Keras model to dot format. +# +# @returns +# A `pydot.Dot` instance representing the Keras model or +# a `pydot.Cluster` instance representing nested model if +# `subgraph=TRUE`. +# +# @param model +# A Keras model instance. +# +# @param show_shapes +# whether to display shape information. +# +# @param show_dtype +# whether to display layer dtypes. +# +# @param show_layer_names +# whether to display layer names. +# +# @param rankdir +# `rankdir` argument passed to PyDot, +# a string specifying the format of the plot: `"TB"` +# creates a vertical plot; `"LR"` creates a horizontal plot. +# +# @param expand_nested +# whether to expand nested Functional models +# into clusters. +# +# @param dpi +# Image resolution in dots per inch. +# +# @param subgraph +# whether to return a `pydot.Cluster` instance. +# +# @param show_layer_activations +# Display layer activations (only for layers that +# have an `activation` property). +# +# @param show_trainable +# whether to display if a layer is trainable. +# +# @param ... +# For forward/backward compatability. +# +# @export +# @noRd +# @family utils +# @seealso +# + +# + +# @tether keras.utils.model_to_dot +# model_to_dot <- +function (model, show_shapes = FALSE, show_dtype = FALSE, show_layer_names = TRUE, + rankdir = "TB", expand_nested = FALSE, dpi = 200L, subgraph = FALSE, + show_layer_activations = FALSE, show_trainable = FALSE, ...) +{ + args <- capture_args(list(dpi = as_integer)) + do.call(keras$utils$model_to_dot, args) +} + + +#' Normalizes an array. +#' +#' @description +#' If the input is an R array, an R array will be returned. +#' If it's a backend tensor, a backend tensor will be returned. #' -#' @param datapath string, path to a HDF5 file -#' @param dataset string, name of the HDF5 dataset in the file specified in datapath -#' @param start int, start of desired slice of the specified dataset -#' @param end int, end of desired slice of the specified dataset -#' @param normalizer function to be called on data when retrieved +#' @returns +#' A normalized copy of the array. #' -#' @return An array-like HDF5 dataset. +#' @param x +#' Array to normalize. #' -#' @details -#' Providing `start` and `end` allows use of a slice of the dataset. +#' @param axis +#' axis along which to normalize. #' -#' Optionally, a normalizer function (or lambda) can be given. This will -#' be called on every slice of data retrieved. +#' @param order +#' Normalization order (e.g. `order=2` for L2 norm). #' #' @export -hdf5_matrix <- function(datapath, dataset, start = 0, end = NULL, normalizer = NULL) { - - if (tensorflow::tf_version() >= "2.4") - stop("This function have been removed in TensorFlow version 2.4 or later.") +#' @family numerical utils +#' @family utils +#' @seealso +#' + +# + +#' @tether keras.utils.normalize +normalize <- +function (x, axis = -1L, order = 2L) +{ + args <- capture_args(list(axis = as_axis, order = as_integer)) + do.call(keras$utils$normalize, args) +} - if (!have_h5py()) - stop("The h5py Python package is required to read h5 files") - resolve_utils()$HDF5Matrix( - datapath = normalize_path(datapath), - dataset = dataset, - start = as.integer(start), - end = as_nullable_integer(end), - normalizer = normalizer - ) +#' Converts a class vector (integers) to binary class matrix. +#' +#' @description +#' E.g. for use with [`loss_categorical_crossentropy()`]. +#' +#' # Examples +#' ```{r} +#' a <- to_categorical(c(0, 1, 2, 3), num_classes=4) +#' print(a) +#' ``` +#' +#' ```{r} +#' b <- array(c(.9, .04, .03, .03, +#' .3, .45, .15, .13, +#' .04, .01, .94, .05, +#' .12, .21, .5, .17), +#' dim = c(4, 4)) +#' loss <- op_categorical_crossentropy(a, b) +#' loss +#' ``` +#' +#' ```{r} +#' loss <- op_categorical_crossentropy(a, a) +#' loss +#' ``` +#' +#' @returns +#' A binary matrix representation of the input as an R array. The class +#' axis is placed last. +#' +#' @param x +#' Array-like with class values to be converted into a matrix +#' (integers from 0 to `num_classes - 1`). +#' R factors are coerced to integer and offset to be 0-based, i.e., +#' `as.integer(x) - 1L`. +#' +#' @param num_classes +#' Total number of classes. If `NULL`, this would be inferred +#' as `max(x) + 1`. Defaults to `NULL`. +#' +#' @export +#' @family numerical utils +#' @family utils +#' @seealso +#' + [`op_one_hot()`], which does the same operation as `to_categorical()`, but +#' operating on tensors. +#' + [`loss_sparse_categorical_crossentropy()`], which can +#' accept labels (`y_true`) as an integer vector, instead of as a dense one-hot +#' matrix. +#' + +# + +#' +#' @tether keras.utils.to_categorical +to_categorical <- +function (x, num_classes = NULL) +{ + args <- capture_args(list(x = function(x) { + if (inherits(x, "factor")) + array(as.integer(x) - 1L, dim = dim(x) %||% length(x)) + else + as_integer_array(x) + }, num_classes = as_integer)) + do.call(keras$utils$to_categorical, args) } -#' Normalize a matrix or nd-array + +#' Sets all random seeds (Python, NumPy, and backend framework, e.g. TF). +#' +#' @description +#' You can use this utility to make almost any Keras program fully +#' deterministic. Some limitations apply in cases where network communications +#' are involved (e.g. parameter server distribution), which creates additional +#' sources of randomness, or when certain non-deterministic cuDNN ops are +#' involved. #' -#' @param x Matrix or array to normalize -#' @param axis Axis along which to normalize. Axis indexes are 1-based -#' (pass -1 to select the last axis). -#' @param order Normalization order (e.g. 2 for L2 norm) +#' This sets: +#' - the R session seed: [`set.seed()`] +#' - the Python session seed: `import random; random.seed(seed)` +#' - the Python NumPy seed: `import numpy; numpy.random.seed(seed)` +#' - the TensorFlow seed: `tf$random$set_seed(seed)` (only if TF is installed) +#' - The Torch seed: `import("torch")$manual_seed(seed)` (only if the backend is torch) +#' - and disables Python hash randomization. #' -#' @return A normalized copy of the array. +#' Note that the TensorFlow seed is set even if you're not using TensorFlow +#' as your backend framework, since many workflows leverage `tf$data` +#' pipelines (which feature random shuffling). Likewise many workflows +#' might leverage NumPy APIs. #' +#' @param seed +#' Integer, the random seed to use. +#' +#' @returns No return value, called for side effects. #' @export -normalize <- function(x, axis = -1, order = 2) { - resolve_utils()$normalize( - x = x, - axis = as_axis(axis), - order = as.integer(order) - ) +#' @family utils +#' @seealso +#' + +# + +#' +#' @tether keras.utils.set_random_seed +set_random_seed <- +function (seed) +{ + seed <- as_integer(seed) + set.seed(seed) + reticulate::py_set_seed(seed) + keras$utils$set_random_seed(seed) } -#' Provide a scope with mappings of names to custom objects + +#' Pads sequences to the same length. #' -#' @param objects Named list of objects -#' @param expr Expression to evaluate +#' @description +#' This function transforms a list (of length `num_samples`) +#' of sequences (lists of integers) +#' into a 2D NumPy array of shape `(num_samples, num_timesteps)`. +#' `num_timesteps` is either the `maxlen` argument if provided, +#' or the length of the longest sequence in the list. #' -#' @details -#' There are many elements of Keras models that can be customized with -#' user objects (e.g. losses, metrics, regularizers, etc.). When -#' loading saved models that use these functions you typically -#' need to explicitily map names to user objects via the `custom_objects` -#' parmaeter. +#' Sequences that are shorter than `num_timesteps` +#' are padded with `value` until they are `num_timesteps` long. #' -#' The `with_custom_object_scope()` function provides an alternative that -#' lets you create a named alias for a user object that applies to an entire -#' block of code, and is automatically recognized when loading saved models. +#' Sequences longer than `num_timesteps` are truncated +#' so that they fit the desired length. #' -#' @examples \dontrun{ -#' # define custom metric -#' metric_top_3_categorical_accuracy <- -#' custom_metric("top_3_categorical_accuracy", function(y_true, y_pred) { -#' metric_top_k_categorical_accuracy(y_true, y_pred, k = 3) -#' }) +#' The position where padding or truncation happens is determined by +#' the arguments `padding` and `truncating`, respectively. +#' Pre-padding or removing values from the beginning of the sequence is the +#' default. #' -#' with_custom_object_scope(c(top_k_acc = sparse_top_k_cat_acc), { +#' ```{r} +#' sequence <- list(c(1), c(2, 3), c(4, 5, 6)) +#' pad_sequences(sequence) +#' ``` #' -#' # ...define model... +#' ```{r} +#' pad_sequences(sequence, value=-1) +#' ``` #' -#' # compile model (refer to "top_k_acc" by name) -#' model %>% compile( -#' loss = "binary_crossentropy", -#' optimizer = optimizer_nadam(), -#' metrics = c("top_k_acc") -#' ) +#' ```{r} +#' pad_sequences(sequence, padding='post') +#' ``` #' -#' # save the model -#' save_model_hdf5("my_model.h5") +#' ```{r} +#' pad_sequences(sequence, maxlen=2) +#' ``` #' -#' # loading the model within the custom object scope doesn't -#' # require explicitly providing the custom_object -#' load_model_hdf5("my_model.h5") -#' }) -#' } +#' @returns +#' Array with shape `(len(sequences), maxlen)` +#' +#' @param sequences +#' List of sequences (each sequence is a list of integers). +#' +#' @param maxlen +#' Optional Int, maximum length of all sequences. If not provided, +#' sequences will be padded to the length of the longest individual +#' sequence. +#' +#' @param dtype +#' (Optional, defaults to `"int32"`). Type of the output sequences. +#' To pad sequences with variable length strings, you can use `object`. +#' +#' @param padding +#' String, "pre" or "post" (optional, defaults to `"pre"`): +#' pad either before or after each sequence. +#' +#' @param truncating +#' String, "pre" or "post" (optional, defaults to `"pre"`): +#' remove values from sequences larger than +#' `maxlen`, either at the beginning or at the end of the sequences. +#' +#' @param value +#' Float or String, padding value. (Optional, defaults to 0.) #' #' @export -with_custom_object_scope <- function(objects, expr) { - objects <- objects_with_py_function_names(objects) - with(resolve_utils()$custom_object_scope(objects), expr) +#' @family utils +#' @seealso +#' + +# + +#' +#' @tether keras.utils.pad_sequences +pad_sequences <- +function (sequences, maxlen = NULL, dtype = "int32", padding = "pre", + truncating = "pre", value = 0) +{ + args <- capture_args(list(maxlen = as_integer, sequences = function (x) + lapply(x, as.list))) + do.call(keras$utils$pad_sequences, args) } -objects_with_py_function_names <- function(objects) { - if(is.null(objects)) - return(NULL) +# -------------------------------------------------------------------------------- - if(!is.list(objects)) - objects <- list(objects) - object_names <- rlang::names2(objects) - # try to infer missing names or raise an error - for (i in seq_along(objects)) { - name <- object_names[[i]] - o <- objects[[i]] - # browser() - if (name == "") { - if (inherits(o, "keras_layer_wrapper")) - o <- attr(o, "Layer") - - if (inherits(o, "python.builtin.object")) - name <- o$`__name__` - else if (inherits(o, "R6ClassGenerator")) - name <- o$classname - else if (is.character(attr(o, "py_function_name", TRUE))) - name <- attr(o, "py_function_name", TRUE) - else - stop("object name could not be infered; please supply a named list", - call. = FALSE) - - object_names[[i]] <- name - } - } - - # add a `py_function_name` attr for bare R functions, if it's missing - objects <- lapply(1:length(objects), function(i) { - object <- objects[[i]] - if (is.function(object) && - !inherits(object, "python.builtin.object") && - is.null(attr(object, "py_function_name", TRUE))) - attr(object, "py_function_name") <- object_names[[i]] - object - }) - - names(objects) <- object_names - objects -} #' Keras array object #' @@ -243,10 +519,11 @@ objects_with_py_function_names <- function(objects) { #' then R doubles will be converted to the default floating point type for the #' current Keras backend. #' -#' @return NumPy array with the specified `dtype` (or list of NumPy arrays if a +#' @returns NumPy array with the specified `dtype` (or list of NumPy arrays if a #' list was passed for `x`). #' -#' @export +#' @keywords internal +#' @noRd keras_array <- function(x, dtype = NULL) { # reflect NULL @@ -254,29 +531,22 @@ keras_array <- function(x, dtype = NULL) { return(x) # reflect HDF5 + # TODO: is this still relevent? Is it still the correct S3 class? + # keras 3 (and tf.keras 2) seems to not export a HDF5 wrapper ... if (inherits(x, "keras.utils.io_utils.HDF5Matrix")) return(x) - # reflect tensor for keras v2.2 or TF implementation >= 1.12 - if (is_tensorflow_implementation()) { - if ( - tf_version() >= "1.12" && - ( - is_keras_tensor(x) || is.list(x) && all(vapply(x, is_keras_tensor, logical(1))) - ) - ) { - return(x) - } - } else { - if ((keras_version() >= "2.2.0") && is_keras_tensor(x)) { - return(x) - } - } + # reflect tensors + if (keras$ops$is_tensor(x)) + return(x) + + # allow passing things like pandas.Series(), for workarounds like + # https://github.com/rstudio/keras/issues/1341 + if(is_py_object(x)) + return(x) - # error for data frames - if (is.data.frame(x)) { + if (is.data.frame(x)) x <- as.list(x) - } # recurse for lists if (is.list(x)) @@ -288,7 +558,7 @@ keras_array <- function(x, dtype = NULL) { # establish the target datatype - if we are converting a double from R # into numpy then use the default floatx for the current backend if (is.null(dtype) && is.double(x)) - dtype <- backend()$floatx() + dtype <- config_floatx() # convert non-array to array if (!is.array(x)) @@ -298,6 +568,9 @@ keras_array <- function(x, dtype = NULL) { x <- r_to_py(x) } + if(!inherits(x, "numpy.ndarray")) + stop("Could not convert object to keras array.") + # if we don't yet have a dtype then use the converted type if (is.null(dtype)) dtype <- x$dtype @@ -308,183 +581,8 @@ keras_array <- function(x, dtype = NULL) { } -#' Check if Keras is Available -#' -#' Probe to see whether the Keras Python package is available in the current -#' system environment. -#' -#' @param version Minimum required version of Keras (defaults to `NULL`, no -#' required version). -#' -#' @return Logical indicating whether Keras (or the specified minimum version of -#' Keras) is available. -#' -#' @examples -#' \dontrun{ -#' # testthat utilty for skipping tests when Keras isn't available -#' skip_if_no_keras <- function(version = NULL) { -#' if (!is_keras_available(version)) -#' skip("Required keras version not available for testing") -#' } -#' -#' # use the function within a test -#' test_that("keras function works correctly", { -#' skip_if_no_keras() -#' # test code here -#' }) -#' } -#' -#' @export -is_keras_available <- function(version = NULL) { - implementation_module <- resolve_implementation_module() - if (reticulate::py_module_available(implementation_module)) { - if (!is.null(version)) - keras_version() >= version - else - TRUE - } else { - FALSE - } -} - - -#' Keras implementation -#' -#' Obtain a reference to the Python module used for the implementation of Keras. -#' -#' There are currently two Python modules which implement Keras: -#' -#' - keras ("keras") -#' - tensorflow.keras ("tensorflow") -#' -#' This function returns a reference to the implementation being currently -#' used by the keras package. The default implementation is "keras". -#' You can override this by setting the `KERAS_IMPLEMENTATION` environment -#' variable to "tensorflow". -#' -#' @return Reference to the Python module used for the implementation of Keras. -#' -#' @export -implementation <- function() { - keras -} - - -is_backend <- function(name) { - identical(backend()$backend(), name) -} - -is_windows <- function() { - identical(.Platform$OS.type, "windows") -} - -is_osx <- function() { - Sys.info()["sysname"] == "Darwin" -} - -is_layer <- function(object) { - inherits(object, "keras.engine.topology.Layer") -} - -relative_to <- function(dir, file) { - # normalize paths - dir <- normalizePath(dir, mustWork = FALSE, winslash = "/") - file <- normalizePath(file, mustWork = FALSE, winslash = "/") - # ensure directory ends with a / - if (!identical(substr(dir, nchar(dir), nchar(dir)), "/")) { - dir <- paste(dir, "/", sep="") - } - - # if the file is prefixed with the directory, return a relative path - if (identical(substr(file, 1, nchar(dir)), dir)) - file <- substr(file, nchar(dir) + 1, nchar(file)) - - # simplify ./ - if (identical(substr(file, 1, 2), "./")) - file <- substr(file, 3, nchar(file)) - - file -} - - -is_keras_tensor <- function(x) { - if (is_tensorflow_implementation()) { - if (tensorflow::tf_version() >= "2.0") tensorflow::tf$is_tensor(x) else tensorflow::tf$contrib$framework$is_tensor(x) - } else { - k_is_tensor(x) - } -} - - - -assert_all_dots_named <- function(envir = parent.frame(), cl) { - - x <- eval(quote(list(...)), envir) - if(!length(x)) - return() - - x <- names(x) - if(is.character(x) && !anyNA(x) && all(x != "")) - return() - - stop("All arguments provided to `...` must be named.\n", - "Call with unnamed arguments in dots:\n ", - paste(deparse(cl, 500L), collapse = "\n")) -} - -# TODO: should there be some default modifiers in capture_args() for standard layer args -# like, input_shape, batch_input_shape, etc. - -capture_args <- function(cl, modifiers = NULL, ignore = NULL, - envir = parent.frame(), fn = sys.function(-1)) { - - ## bug: match.call() resolves incorrectly if dots are from not the default sys.parent() - ## e.g, this fails if dots originate from the callers caller: - # cl <- eval(quote(match.call()), parent.frame()) - ## workaround: caller must call match.call() from the correct frame - - ## note: capture_args() must always be called at the top level of the intended function body. - ## sys.function(-1) resolves to the incorrect function if the capture_args() - ## call is itself a promise in another call. E.g.,: - ## do.call(foo, capture_args(match.call())) fails because fn resolves to do.call() - - fn_arg_nms <- names(formals(fn)) - known_args <- intersect(names(cl), fn_arg_nms) - known_args <- setdiff(known_args, ignore) - names(known_args) <- known_args - cl2 <- c(quote(list), lapply(known_args, as.symbol)) - - if("..." %in% fn_arg_nms && !"..." %in% ignore) { - assert_all_dots_named(envir, cl) - # this might reorder args by assuming ... are last, but it doesn't matter - # since everything is supplied as a keyword arg to the Python side anyway - cl2 <- c(cl2, quote(...)) - } - - args <- eval(as.call(cl2), envir) - - # check `ignore` again, since arg might have been in `...` - for(nm in intersect(names(args), ignore)) - args[[nm]] <- NULL - - nms_to_modify <- intersect(names(args), names(modifiers)) - for (nm in nms_to_modify) - args[nm] <- list(modifiers[[nm]](args[[nm]])) - # list() so if modifier returns NULL, don't remove the arg - - args -} - - -is_scalar <- function(x) identical(length(x), 1L) - -is_mac_arm64 <- function() { - sys_info <- Sys.info() - sys_info[["sysname"]] == "Darwin" && - sys_info[["machine"]] == "arm64" -} #' Plot a Keras model @@ -495,7 +593,7 @@ is_mac_arm64 <- function() { #' @param show_shapes whether to display shape information. #' @param show_dtype whether to display layer dtypes. #' @param show_layer_names whether to display layer names. -#' @param ... passed on to `keras$utils$plot_model()`. Used for forward and +#' @param ... passed on to Python `keras.utils.model_to_dot()`. Used for forward and #' backward compatibility. #' @param rankdir a string specifying the format of the plot: `'TB'` creates a #' vertical plot; `'LR'` creates a horizontal plot. (argument passed to PyDot) @@ -512,71 +610,116 @@ is_mac_arm64 <- function() { #' resultant subgraph must be complete. #' @param show_layer_activations Display layer activations (only for layers that #' have an `activation` property). +#' @param show_trainable +#' whether to display if a layer is trainable. #' -#' @return Nothing, called for it's side effects. +#' @returns Nothing, called for it's side effects. #' -#' @section Raises: ValueError: if `plot_model` is called before the model is +#' @section Raises: ValueError: if `plot(model)` is called before the model is #' built, unless a `input_shape = ` argument was supplied to #' `keras_model_sequential()`. #' #' @section Requirements: #' This function requires pydot and graphviz. #' `pydot` is by default installed by `install_keras()`, but if you installed -#' tensorflow by other means, you can install pydot directly with : -#' ```` +#' keras by other means, you can install `pydot` directly with : +#' ````r #' reticulate::py_install("pydot", pip = TRUE) #' ```` -#' In a conda environment, you can install graphviz with: +#' You can install graphviz directly from here: +#' +#' +#' On most Linux platforms, can install graphviz via the package manager. +#' For example, on Ubuntu/Debian you can install with +#' ```sh +#' sudo apt install graphviz #' ``` +#' In a conda environment, you can install graphviz with: +#' ```r #' reticulate::conda_install(packages = "graphviz") #' # Restart the R session after install. #' ``` -#' Otherwise you can install graphviz from here: -#' -#' +#' @tether keras.utils.model_to_dot #' @export -plot.keras.engine.training.Model <- +plot.keras.src.models.model.Model <- function(x, show_shapes = FALSE, show_dtype = FALSE, - show_layer_names = TRUE, + show_layer_names = FALSE, ..., rankdir = "TB", expand_nested = FALSE, - dpi = 96, + dpi = 200, layer_range = NULL, show_layer_activations = FALSE, + show_trainable = NA, to_file = NULL) { - args <- capture_args(match.call(), ignore = "x") + + args <- capture_args(ignore = c("x", "to_file", "show_trainable"), + force = c("show_layer_names")) args$model <- x + + if (is.na(show_trainable)) { + built <- as_r_value(py_get_attr(x, "built", silent = TRUE)) %||% FALSE + show_trainable <- built && as.logical(length(x$non_trainable_weights)) + } + args$show_trainable <- show_trainable + if (is.null(to_file)) { - args$to_file <- - tempfile(paste0("keras_", x$name), fileext = ".png") - on.exit(unlink(args$to_file)) + + if (isTRUE(getOption('knitr.in.progress'))) { + + options <- knitr::opts_current$get() + plot_counter <- asNamespace("knitr")$plot_counter + number <- plot_counter() + + file <- knitr::fig_path( + suffix = options$dev %||% ".png", + options = options, + number = number + ) + + dir.create(dirname(file), recursive = TRUE, showWarnings = FALSE) + # args$dpi <- args$dpi %||% options$dpi + + } else { + + file <- tempfile(paste0("keras_", x$name), fileext = ".png") + on.exit(unlink(file), add = TRUE) + + } + } else { + file <- to_file } - tryCatch( - do.call(keras$utils$plot_model, args), + tryCatch({ + dot <- do.call(keras$utils$model_to_dot, args) + dot$write(file, format = tools::file_ext(file)) + }, error = function(e) { - message("See ?keras::plot.keras.engine.training.Model for ", - " instructions on how to install graphviz and pydot") + message("See ?keras3::plot.keras.src.models.model.Model for", + " instructions on how to install graphviz and pydot.") e$call <- sys.call(1) stop(e) } ) - if(!is.null(to_file)) + + if (!is.null(to_file)) return(invisible()) - img <- png::readPNG(args$to_file, native = TRUE) + if (isTRUE(getOption('knitr.in.progress'))) + return(knitr::include_graphics(file, dpi = dpi)) + + img <- png::readPNG(file, native = TRUE) graphics::plot.new() graphics::plot.window(xlim = c(0, ncol(img)), ylim = c(0, nrow(img)), asp = 1, yaxs = "i", xaxs = "i") - graphics::rasterImage(img, 0, 0, ncol(img), nrow(img), interpolate = FALSE) + graphics::rasterImage(img, 0, 0, ncol(img), nrow(img), interpolate = TRUE) invisible() } -#' zip lists +#' Zip lists #' #' This is conceptually similar to `zip()` in Python, or R functions #' `purrr::transpose()` and `data.table::transpose()` (albeit, accepting @@ -591,9 +734,12 @@ function(x, #' #' @param ... R lists or atomic vectors, optionally named. #' -#' @return A inverted list +#' @returns A inverted list #' @export #' +#' @family data utils +#' @family utils +#' #' @examples #' gradients <- list("grad_for_wt_1", "grad_for_wt_2", "grad_for_wt_3") #' weights <- list("weight_1", "weight_2", "weight_3") @@ -606,7 +752,7 @@ function(x, #' names(gradients) <- paste0("gradient_", 1:3) #' try(zip_lists(gradients, weights)) # error, names don't match #' # call unname directly for positional matching -#' zip_lists(unname(gradients), unname(weights)) +#' str(zip_lists(unname(gradients), unname(weights))) zip_lists <- function(...) { dots <- list(...) if(length(dots) == 1) @@ -636,36 +782,31 @@ zip_lists <- function(...) { } -drop_nulls <- function(x, i = NULL) { - if(is.null(i)) - return(x[!vapply(x, is.null, FALSE, USE.NAMES = FALSE)]) - drop <- logical(length(x)) - names(drop) <- names(x) - drop[i] <- vapply(x[i], is.null, FALSE, USE.NAMES = FALSE) - x[!drop] -} - - - - -as_r_value <- function (x) { - if (inherits(x, "python.builtin.object")) - py_to_r(x) - else x +#' Generate a Random Array +#' +#' This function generates an R array with random numbers. +#' The dimensions of the array are specified by the user. +#' The generation function for the random numbers can also be customized. +#' +#' @param ... Dimensions for the array as separate integers or as a single vector. +#' @param gen A function for generating random numbers, defaulting to `runif`. +#' +#' @returns Returns an array with the specified dimensions filled with random numbers. +#' @noRd +#' +#' @examples +#' # Create a 3x3 matrix with random numbers from uniform distribution +#' random_array(3, 3) +#' +#' # Create a 2x2x2 array with random numbers from normal distribution +#' random_array(2, 2, 2, gen = rnorm) +#' +#' # Create a 2x2 array with a sequence of integers. +#' random_array(2, 2, gen = seq) +#' +#' @keywords internal +random_array <- function(..., gen = stats::runif) { + dim <- unlist(c(...), use.names = FALSE) + array(gen(prod(dim)), dim = dim) } - - -# internal `[` method that ensures functions in this namespace use one-based -# indexing in case user has a global option set for zero-based indexing. -`[.tensorflow.tensor` <- - getS3method("[", "tensorflow.tensor", envir = asNamespace("tensorflow")) -formals(`[.tensorflow.tensor`)$style <- "R" -formals(`[.tensorflow.tensor`)$options <- - tensorflow::tf_extract_opts( - one_based = TRUE, - inclusive_stop = TRUE, - disallow_out_of_bounds = TRUE, - warn_tensors_passed_asis = FALSE, - warn_negatives_pythonic = FALSE - ) diff --git a/R/wrapper_custom.R b/R/wrapper_custom.R deleted file mode 100644 index 14adf3d9ae..0000000000 --- a/R/wrapper_custom.R +++ /dev/null @@ -1,147 +0,0 @@ - -#' (Deprecated) Base R6 class for Keras wrappers -#' -#' Instead of inheriting from the proxy class `KerasWrapper` and using -#' `create_wrapper` to create instances, new R6 custom classes are encouraged to -#' inherit directly from `keras$layers$Wrapper` and use `create_layer` to create -#' instances. -#' -#' @docType class -#' -#' @format An [R6Class] generator object -#' -#' @section Methods: \describe{ -#' \item{\code{build(input_shape)}}{Builds the wrapped layer. -#' Subclasses can extend this to perform custom operations on that layer.} -#' \item{\code{call(inputs,mask)}}{Calls the wrapped layer on an input tensor.} -#' \item{\code{compute_output_shape(input_shape)}}{Computes the output shape -#' for the wrapped layer.} -#' \item{\code{add_loss(losses, inputs)}}{Subclasses can use this to add losses to the wrapped layer.} -#' \item{\code{add_weight(name,shape,dtype,initializer,regularizer,trainable,constraint)}}{Subclasses can use this to add weights to the wrapped layer.} } -#' -#' @return [KerasWrapper]. -#' -#' @keywords internal -#' @export -KerasWrapper <- R6::R6Class( - "KerasWrapper", - - public = list( - build = function(input_shape) { - if (!private$py_wrapper$layer$built) private$py_wrapper$layer$build(input_shape) - }, - - call = function(inputs, mask = NULL) { - private$py_wrapper$layer$call(inputs) - }, - - compute_output_shape = function(input_shape) { - private$py_wrapper$layer$compute_output_shape(input_shape) - }, - - add_loss = function(losses, inputs = NULL) { - args <- list() - args$losses <- losses - args$inputs <- inputs - do.call(private$py_wrapper$layer$add_loss, args) - }, - - add_weight = function(name, - shape, - dtype = NULL, - initializer = NULL, - regularizer = NULL, - trainable = TRUE, - constraint = NULL) { - args <- list() - args$name <- name - args$shape <- shape - args$dtype <- dtype - args$initializer <- initializer - args$regularizer <- regularizer - args$trainable <- trainable - args$constraint <- constraint - - do.call(private$py_wrapper$layer$add_weight, args) - }, - - .set_py_wrapper = function(py_wrapper) { - private$py_wrapper <- py_wrapper - }, - - python_wrapper = function() { - private$py_wrapper - } - ), - - active = list( - input = function(value) { - if (missing(value)) - return(private$py_wrapper$input) - else - private$py_wrapper$input <- value - }, - output = function(value) { - if (missing(value)) - return(private$py_wrapper$output) - else - private$py_wrapper$output <- value - } - ), - - private = list(py_wrapper = NULL) -) - -#' (Deprecated) Create a Keras Wrapper -#' -#' R6 classes that inherit from `keras$layers$Wrapper` can now be instantiated -#' directly by `create_layer` -#' -#' @param wrapper_class R6 class of type KerasWrapper -#' @param object Object to compose layer with. This is either a -#' [keras_model_sequential()] to add the layer to, or another Layer which -#' this layer will call. -#' @param args List of arguments to layer constructor function -#' -#' @return A Keras wrapper -#' -#' @note The `object` parameter can be missing, in which case the -#' layer is created without a connection to an existing graph. -#' -#' @keywords internal -#' @export -create_wrapper <- function(wrapper_class, object, args = list()) { - - args$layer <- args$layer - args$input_shape <- args$input_shape - args$batch_input_shape <- args$batch_input_shape - args$batch_size <- args$batch_size - args$dtype <- args$dtype - args$name <- args$name - args$trainable <- args$trainable - args$weights <- args$weights - - common_arg_names <- c("layer", "input_shape", "batch_input_shape", "batch_size", - "dtype", "name", "trainable", "weights") - py_wrapper_args <- args[common_arg_names] - py_wrapper_args[sapply(py_wrapper_args, is.null)] <- NULL - for (arg in names(py_wrapper_args)) - args[[arg]] <- NULL - - r6_wrapper <- do.call(wrapper_class$new, args) - - python_path <- system.file("python", package = "keras") - tools <- reticulate::import_from_path("kerastools", path = python_path) - py_wrapper_args$r_build <- r6_wrapper$build - py_wrapper_args$r_call <- reticulate::py_func(r6_wrapper$call) - py_wrapper_args$r_compute_output_shape <- r6_wrapper$compute_output_shape - py_wrapper <- do.call(tools$wrapper$RWrapper, py_wrapper_args) - - r6_wrapper$.set_py_wrapper(py_wrapper) - - - if (missing(object) || is.null(object)) - r6_wrapper - else - invisible(compose_layer(object, py_wrapper)) -} diff --git a/R/zzz-aliases.R b/R/zzz-aliases.R new file mode 100644 index 0000000000..a1ad41b72a --- /dev/null +++ b/R/zzz-aliases.R @@ -0,0 +1,47 @@ + + +.alias_roxygen <- function(backcompat_name, name) { + strsplit(glue::glue(" + {name} + + `{backcompat_name}()` is an alias for [`{name}()`]. + See `?`[`{name}()`] for the full documentation. + + @inheritParams {name} + @inherit {name} return + @keywords internal + @export + + "), "\n", fixed = TRUE)[[1L]] +} + + +#' @eval .alias_roxygen("new_callback_class", "Callback") +new_callback_class <- Callback + +#' @eval .alias_roxygen("new_layer_class", "Layer") +new_layer_class <- Layer + +#' @eval .alias_roxygen("new_loss_class", "Loss") +new_loss_class <- Loss + +#' @eval .alias_roxygen("new_metric_class", "Metric") +new_metric_class <- Metric + +#' @eval .alias_roxygen("new_model_class", "Model") +new_model_class <- Model + +#' @eval .alias_roxygen("new_learning_rate_schedule_class", "LearningRateSchedule") +new_learning_rate_schedule_class <- LearningRateSchedule + +#' @eval .alias_roxygen("mark_active", "active_property") +mark_active <- active_property + +#' @eval .alias_roxygen("layer_input", "keras_input") +layer_input <- keras_input + +#' @eval .alias_roxygen("bidirectional", "layer_bidirectional") +bidirectional <- layer_bidirectional + +#' @eval .alias_roxygen("time_distributed", "layer_time_distributed") +time_distributed <- layer_time_distributed diff --git a/R/zzz-metrics-callback.R b/R/zzz-metrics-callback.R new file mode 100644 index 0000000000..33069d705a --- /dev/null +++ b/R/zzz-metrics-callback.R @@ -0,0 +1,131 @@ + + +callback_view_metrics <- Callback( + "ViewMetricsCallback", + + public = list( + + initialize = function(view_metrics = FALSE, initial_epoch = 1) { + private$view_metrics <- view_metrics + private$initial_epoch <- initial_epoch + }, + + on_train_begin = function(logs = NULL) { + if (tfruns::is_run_active()) { + private$write_params(self$params) + private$write_model_info(self$model) + } + }, + + on_epoch_end = function(epoch, logs = NULL) { + + if ((epoch - private$initial_epoch) == 0) { + # first epoch + + # logs is a dict/named list + private$metrics <- lapply(logs, function(x) logical()) + + sleep <- 0.25 # 0.5 + + } else { + + sleep <- 0.1 + + } + + # handle metrics + private$on_metrics(logs, sleep) + + } + ), + + private = list( + on_metrics = function(logs, sleep) { + + # record metrics + metrics <- private$metrics + for (metric in names(metrics)) { + # guard against metrics not yet available by using NA + # when a named metrics isn't passed in 'logs' + append(metrics[[metric]]) <- mean(logs[[metric]] %||% NA) + } + private$metrics <- metrics + + # create history object and convert to metrics data frame + + history <- keras_training_history(self$params, private$metrics) + metrics <- private$as_metrics_df(history) + + # view metrics if requested + if (private$view_metrics) { + + # create the metrics_viewer or update if we already have one + metrics_viewer <- private$metrics_viewer + if (is.null(metrics_viewer)) { + private$metrics_viewer <- tfruns::view_run_metrics(metrics) + } else { + tfruns::update_run_metrics(metrics_viewer, metrics) + } + + # pump events + utils::process.events() + Sys.sleep(sleep) + utils::process.events() + } + # record metrics + tfruns::write_run_metadata("metrics", metrics) + }, + + # convert keras history to metrics data frame suitable for plotting + as_metrics_df = function(history) { + + # create metrics data frame + metrics <- lapply(history$metrics, function(m) sapply(m, as.numeric)) + df <- as.data.frame(metrics) + + # pad to epochs if necessary + pad <- history$params$epochs - nrow(df) + pad_data <- list() + + metric_names <- names(history$metrics) + + for (metric in metric_names) + pad_data[[metric]] <- rep_len(NA, pad) + + df <- rbind(df, pad_data) + + # return df + df + }, + + write_params = function(params) { + properties <- list( + samples = params$samples, + validation_samples = params$validation_samples, + epochs = params$epochs, + batch_size = params$batch_size + ) + tfruns::write_run_metadata("properties", properties) + }, + + write_model_info = function(model) { + tryCatch({ + model_info <- list() + model_info$model <- py_str(model, line_length = 80L) + if (is.character(model$loss)) + model_info$loss_function <- model$loss + else if (inherits(model$loss, "python.builtin.function")) + model_info$loss_function <- model$loss$`__name__` + optimizer <- model$optimizer + if (!is.null(optimizer)) { + model_info$optimizer <- py_str(optimizer) + model_info$learning_rate <- as.double(optimizer$lr) + } + tfruns::write_run_metadata("properties", model_info) + }, error = function(e) { + warning("Unable to log model info: ", e$message, call. = FALSE) + }) + + } + ) +) diff --git a/README.md b/README.md index dd6e615392..8f8f2caa34 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ![](https://s3.amazonaws.com/keras.io/img/keras-logo-2018-large-1200.png) -[![R build status](https://github.com/rstudio/keras/workflows/R-CMD-check/badge.svg)](https://github.com/rstudio/keras/actions?workflow=R-CMD-check) +[![R-CMD-check](https://github.com/rstudio/keras/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/rstudio/keras/actions/workflows/R-CMD-check.yaml) [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/keras)](https://cran.r-project.org/package=keras) [![license](https://img.shields.io/github/license/mashape/apistatus.svg?maxAge=2592000)](https://github.com/keras-team/keras/blob/master/LICENSE) @@ -16,5 +16,5 @@ - Supports arbitrary network architectures: multi-input or multi-output models, layer sharing, model sharing, etc. This means that Keras is appropriate for building essentially any deep learning model, from a memory network to a neural Turing machine. -See the package website at for complete documentation. +See the package website at for complete documentation. diff --git a/cran-comments.md b/cran-comments.md index 9231afcfec..1f305497bc 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,5 +1,9 @@ -New release, bugfixes and updates. -Details in NEWS.md - ## R CMD check results -There were no ERRORs or WARNINGs. + +0 errors | 0 warnings | 1 note + +* This is a new release. +* installed size is 9.8Mb + sub-directories of 1Mb or more: + doc 3.6Mb + help 5.3Mb diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index c05e7776da..0000000000 --- a/docs/.gitignore +++ /dev/null @@ -1 +0,0 @@ -repos diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000000..e10bb80289 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,120 @@ + + + + + + + +Page not found (404) • keras3 + + + + + + + + + + + + + + + + Skip to contents + + +
+
+
+ +Content not found. Please use links in the navbar. + +
+
+ + +
+ + + +
+
+ + + + + + + diff --git a/docs/CNAME b/docs/CNAME new file mode 100644 index 0000000000..0fd9ec66a3 --- /dev/null +++ b/docs/CNAME @@ -0,0 +1 @@ +keras.posit.co \ No newline at end of file diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html new file mode 100644 index 0000000000..5f81cf838d --- /dev/null +++ b/docs/LICENSE-text.html @@ -0,0 +1,94 @@ + +License • keras3 + Skip to contents + + +
+
+
+ +
YEAR: 2024
+COPYRIGHT HOLDER: Posit Software, PBC; Google, Inc; François Chollet; Yuan Tang
+
+ +
+ + +
+ + + +
+ + + + + + + diff --git a/website/apple-touch-icon-120x120.png b/docs/apple-touch-icon-120x120.png similarity index 100% rename from website/apple-touch-icon-120x120.png rename to docs/apple-touch-icon-120x120.png diff --git a/website/apple-touch-icon-152x152.png b/docs/apple-touch-icon-152x152.png similarity index 100% rename from website/apple-touch-icon-152x152.png rename to docs/apple-touch-icon-152x152.png diff --git a/website/apple-touch-icon-180x180.png b/docs/apple-touch-icon-180x180.png similarity index 100% rename from website/apple-touch-icon-180x180.png rename to docs/apple-touch-icon-180x180.png diff --git a/website/apple-touch-icon-60x60.png b/docs/apple-touch-icon-60x60.png similarity index 100% rename from website/apple-touch-icon-60x60.png rename to docs/apple-touch-icon-60x60.png diff --git a/website/apple-touch-icon-76x76.png b/docs/apple-touch-icon-76x76.png similarity index 100% rename from website/apple-touch-icon-76x76.png rename to docs/apple-touch-icon-76x76.png diff --git a/website/apple-touch-icon.png b/docs/apple-touch-icon.png similarity index 100% rename from website/apple-touch-icon.png rename to docs/apple-touch-icon.png diff --git a/docs/articles/.gitignore b/docs/articles/.gitignore deleted file mode 100644 index 5c41d26e17..0000000000 --- a/docs/articles/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -logs -checkpoints.h5 diff --git a/docs/articles/about_keras_layers.html b/docs/articles/about_keras_layers.html deleted file mode 100644 index 0b4ecee283..0000000000 --- a/docs/articles/about_keras_layers.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/articles/about_keras_models.html b/docs/articles/about_keras_models.html deleted file mode 100644 index bef6637097..0000000000 --- a/docs/articles/about_keras_models.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/articles/applications.html b/docs/articles/applications.html deleted file mode 100644 index d633045a9e..0000000000 --- a/docs/articles/applications.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/articles/backend.html b/docs/articles/backend.html deleted file mode 100644 index cddff94b97..0000000000 --- a/docs/articles/backend.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/articles/custom_layers.html b/docs/articles/custom_layers.html deleted file mode 100644 index 850f842746..0000000000 --- a/docs/articles/custom_layers.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/articles/custom_train_step_in_jax.html b/docs/articles/custom_train_step_in_jax.html new file mode 100644 index 0000000000..d6229fe07b --- /dev/null +++ b/docs/articles/custom_train_step_in_jax.html @@ -0,0 +1,468 @@ + + + + + + + + +Customizing what happens in `fit()` with JAX • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

When you’re doing supervised learning, you can use fit() +and everything works smoothly.

+

When you need to take control of every little detail, you can write +your own training loop entirely from scratch.

+

But what if you need a custom training algorithm, but you still want +to benefit from the convenient features of fit(), such as +callbacks, built-in distribution support, or step fusing?

+

A core principle of Keras is progressive disclosure of +complexity. You should always be able to get into lower-level +workflows in a gradual way. You shouldn’t fall off a cliff if the +high-level functionality doesn’t exactly match your use case. You should +be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience.

+

When you need to customize what fit() does, you should +override the training step function of the Model +class. This is the function that is called by +fit() for every batch of data. You will then be able to +call fit() as usual – and it will be running your own +learning algorithm.

+

Note that this pattern does not prevent you from building models with +the Functional API. You can do this whether you’re building +Sequential models, Functional API models, or subclassed +models.

+

Let’s see how that works.

+
+
+

Setup +

+
import os
+
+# This guide can only be run with the JAX backend.
+os.environ["KERAS_BACKEND"] = "jax"
+
+import jax
+import keras
+import numpy as np
+
+
+

A first simple example +

+

Let’s start from a simple example:

+
    +
  • We create a new class that subclasses keras.Model.
  • +
  • We implement a fully-stateless +compute_loss_and_updates() method to compute the loss as +well as the updated values for the non-trainable variables of the model. +Internally, it calls stateless_call() and the built-in +compute_loss().
  • +
  • We implement a fully-stateless train_step() method to +compute current metric values (including the loss) as well as updated +values for the trainable variables, the optimizer variables, and the +metric variables.
  • +
+

Note that you can also take into account the +sample_weight argument by:

+
    +
  • Unpacking the data as x, y, sample_weight = data +
  • +
  • Passing sample_weight to +compute_loss() +
  • +
  • Passing sample_weight alongside y and +y_pred to metrics in +stateless_update_state() +
  • +
+
class CustomModel(keras.Model):
+    def compute_loss_and_updates(
+        self,
+        trainable_variables,
+        non_trainable_variables,
+        x,
+        y,
+        training=False,
+    ):
+        y_pred, non_trainable_variables = self.stateless_call(
+            trainable_variables,
+            non_trainable_variables,
+            x,
+            training=training,
+        )
+        loss = self.compute_loss(x, y, y_pred)
+        return loss, (y_pred, non_trainable_variables)
+
+    def train_step(self, state, data):
+        (
+            trainable_variables,
+            non_trainable_variables,
+            optimizer_variables,
+            metrics_variables,
+        ) = state
+        x, y = data
+
+        # Get the gradient function.
+        grad_fn = jax.value_and_grad(
+            self.compute_loss_and_updates, has_aux=True
+        )
+
+        # Compute the gradients.
+        (loss, (y_pred, non_trainable_variables)), grads = grad_fn(
+            trainable_variables,
+            non_trainable_variables,
+            x,
+            y,
+            training=True,
+        )
+
+        # Update trainable variables and optimizer variables.
+        (
+            trainable_variables,
+            optimizer_variables,
+        ) = self.optimizer.stateless_apply(
+            optimizer_variables, grads, trainable_variables
+        )
+
+        # Update metrics.
+        new_metrics_vars = []
+        for metric in self.metrics:
+            this_metric_vars = metrics_variables[
+                len(new_metrics_vars) : len(new_metrics_vars)
+                + len(metric.variables)
+            ]
+            if metric.name == "loss":
+                this_metric_vars = metric.stateless_update_state(
+                    this_metric_vars, loss
+                )
+            else:
+                this_metric_vars = metric.stateless_update_state(
+                    this_metric_vars, y, y_pred
+                )
+            logs = metric.stateless_result(this_metric_vars)
+            new_metrics_vars += this_metric_vars
+
+        # Return metric logs and updated state variables.
+        state = (
+            trainable_variables,
+            non_trainable_variables,
+            optimizer_variables,
+            new_metrics_vars,
+        )
+        return logs, state
+

Let’s try this out:

+
# Construct and compile an instance of CustomModel
+inputs = keras.Input(shape=(32,))
+outputs = keras.layers.Dense(1)(inputs)
+model = CustomModel(inputs, outputs)
+model.compile(optimizer="adam", loss="mse", metrics=["mae"])
+
+# Just use `fit` as usual
+x = np.random.random((1000, 32))
+y = np.random.random((1000, 1))
+model.fit(x, y, epochs=3)
+
+
+

Going lower-level +

+

Naturally, you could just skip passing a loss function in +compile(), and instead do everything manually in +train_step. Likewise for metrics.

+

Here’s a lower-level example, that only uses compile() +to configure the optimizer:

+
class CustomModel(keras.Model):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.loss_tracker = keras.metrics.Mean(name="loss")
+        self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae")
+        self.loss_fn = keras.losses.MeanSquaredError()
+
+    def compute_loss_and_updates(
+        self,
+        trainable_variables,
+        non_trainable_variables,
+        x,
+        y,
+        training=False,
+    ):
+        y_pred, non_trainable_variables = self.stateless_call(
+            trainable_variables,
+            non_trainable_variables,
+            x,
+            training=training,
+        )
+        loss = self.loss_fn(y, y_pred)
+        return loss, (y_pred, non_trainable_variables)
+
+    def train_step(self, state, data):
+        (
+            trainable_variables,
+            non_trainable_variables,
+            optimizer_variables,
+            metrics_variables,
+        ) = state
+        x, y = data
+
+        # Get the gradient function.
+        grad_fn = jax.value_and_grad(
+            self.compute_loss_and_updates, has_aux=True
+        )
+
+        # Compute the gradients.
+        (loss, (y_pred, non_trainable_variables)), grads = grad_fn(
+            trainable_variables,
+            non_trainable_variables,
+            x,
+            y,
+            training=True,
+        )
+
+        # Update trainable variables and optimizer variables.
+        (
+            trainable_variables,
+            optimizer_variables,
+        ) = self.optimizer.stateless_apply(
+            optimizer_variables, grads, trainable_variables
+        )
+
+        # Update metrics.
+        loss_tracker_vars = metrics_variables[
+            : len(self.loss_tracker.variables)
+        ]
+        mae_metric_vars = metrics_variables[len(self.loss_tracker.variables) :]
+
+        loss_tracker_vars = self.loss_tracker.stateless_update_state(
+            loss_tracker_vars, loss
+        )
+        mae_metric_vars = self.mae_metric.stateless_update_state(
+            mae_metric_vars, y, y_pred
+        )
+
+        logs = {}
+        logs[self.loss_tracker.name] = self.loss_tracker.stateless_result(
+            loss_tracker_vars
+        )
+        logs[self.mae_metric.name] = self.mae_metric.stateless_result(
+            mae_metric_vars
+        )
+
+        new_metrics_vars = loss_tracker_vars + mae_metric_vars
+
+        # Return metric logs and updated state variables.
+        state = (
+            trainable_variables,
+            non_trainable_variables,
+            optimizer_variables,
+            new_metrics_vars,
+        )
+        return logs, state
+
+    @property
+    def metrics(self):
+        # We list our `Metric` objects here so that `reset_states()` can be
+        # called automatically at the start of each epoch
+        # or at the start of `evaluate()`.
+        return [self.loss_tracker, self.mae_metric]
+
+
+# Construct an instance of CustomModel
+inputs = keras.Input(shape=(32,))
+outputs = keras.layers.Dense(1)(inputs)
+model = CustomModel(inputs, outputs)
+
+# We don't passs a loss or metrics here.
+model.compile(optimizer="adam")
+
+# Just use `fit` as usual -- you can use callbacks, etc.
+x = np.random.random((1000, 32))
+y = np.random.random((1000, 1))
+model.fit(x, y, epochs=5)
+
+
+

Providing your own evaluation step +

+

What if you want to do the same for calls to +model.evaluate()? Then you would override +test_step in exactly the same way. Here’s what it looks +like:

+
class CustomModel(keras.Model):
+    def test_step(self, state, data):
+        # Unpack the data.
+        x, y = data
+        (
+            trainable_variables,
+            non_trainable_variables,
+            metrics_variables,
+        ) = state
+
+        # Compute predictions and loss.
+        y_pred, non_trainable_variables = self.stateless_call(
+            trainable_variables,
+            non_trainable_variables,
+            x,
+            training=False,
+        )
+        loss = self.compute_loss(x, y, y_pred)
+
+        # Update metrics.
+        new_metrics_vars = []
+        for metric in self.metrics:
+            this_metric_vars = metrics_variables[
+                len(new_metrics_vars) : len(new_metrics_vars)
+                + len(metric.variables)
+            ]
+            if metric.name == "loss":
+                this_metric_vars = metric.stateless_update_state(
+                    this_metric_vars, loss
+                )
+            else:
+                this_metric_vars = metric.stateless_update_state(
+                    this_metric_vars, y, y_pred
+                )
+            logs = metric.stateless_result(this_metric_vars)
+            new_metrics_vars += this_metric_vars
+
+        # Return metric logs and updated state variables.
+        state = (
+            trainable_variables,
+            non_trainable_variables,
+            new_metrics_vars,
+        )
+        return logs, state
+
+
+# Construct an instance of CustomModel
+inputs = keras.Input(shape=(32,))
+outputs = keras.layers.Dense(1)(inputs)
+model = CustomModel(inputs, outputs)
+model.compile(loss="mse", metrics=["mae"])
+
+# Evaluate with our custom test_step
+x = np.random.random((1000, 32))
+y = np.random.random((1000, 1))
+model.evaluate(x, y)
+

That’s it!

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/custom_train_step_in_tensorflow.html b/docs/articles/custom_train_step_in_tensorflow.html new file mode 100644 index 0000000000..0346bbf5c1 --- /dev/null +++ b/docs/articles/custom_train_step_in_tensorflow.html @@ -0,0 +1,613 @@ + + + + + + + + +Customizing what happens in `fit()` with TensorFlow • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

When you’re doing supervised learning, you can use fit() +and everything works smoothly.

+

When you need to take control of every little detail, you can write +your own training loop entirely from scratch.

+

But what if you need a custom training algorithm, but you still want +to benefit from the convenient features of fit(), such as +callbacks, built-in distribution support, or step fusing?

+

A core principle of Keras is progressive disclosure of +complexity. You should always be able to get into lower-level +workflows in a gradual way. You shouldn’t fall off a cliff if the +high-level functionality doesn’t exactly match your use case. You should +be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience.

+

When you need to customize what fit() does, you should +override the training step function of the Model +class. This is the function that is called by +fit() for every batch of data. You will then be able to +call fit() as usual – and it will be running your own +learning algorithm.

+

Note that this pattern does not prevent you from building models with +the Functional API. You can do this whether you’re building +Sequential models, Functional API models, or subclassed +models.

+

Let’s see how that works.

+
+
+

Setup +

+
+library(reticulate)
+library(tensorflow, exclude = c("set_random_seed", "shape"))
+library(keras3)
+
+
+

A first simple example +

+

Let’s start from a simple example:

+
    +
  • We create a new class that subclasses Model.
  • +
  • We just override the method +train_step(self, data).
  • +
  • We return a dictionary mapping metric names (including the loss) to +their current value.
  • +
+

The input argument data is what gets passed to fit as +training data:

+
    +
  • If you pass arrays, by calling fit(x, y, ...), then +data will be the list (x, y) +
  • +
  • If you pass a tf_dataset, by calling +fit(dataset, ...), then data will be what gets +yielded by dataset at each batch.
  • +
+

In the body of the train_step() method, we implement a +regular training update, similar to what you are already familiar with. +Importantly, we compute the loss via +self.compute_loss(), which wraps the loss(es) +function(s) that were passed to compile().

+

Similarly, we call metric$update_state(y, y_pred) on +metrics from self$metrics, to update the state of the +metrics that were passed in compile(), and we query results +from self$metrics at the end to retrieve their current +value.

+
+CustomModel <- new_model_class(
+  "CustomModel",
+  train_step = function(data) {
+    c(x, y, sample_weight) %<-% unpack_x_y_sample_weight(data)
+
+    with(tf$GradientTape() %as% tape, {
+      y_pred <- self(x, training = TRUE)
+      loss <- self$compute_loss(y = y, y_pred = y_pred,
+                                sample_weight = sample_weight)
+    })
+
+    # Compute gradients
+    trainable_vars <- self$trainable_variables
+    gradients <- tape$gradient(loss, trainable_vars)
+
+    # Update weights
+    self$optimizer$apply(gradients, trainable_vars)
+
+    # Update metrics (includes the metric that tracks the loss)
+    for (metric in self$metrics) {
+      if (metric$name == "loss")
+        metric$update_state(loss)
+      else
+        metric$update_state(y, y_pred)
+    }
+
+    # Return a dict mapping metric names to current value
+    metrics <- lapply(self$metrics, function(m) m$result())
+    metrics <- setNames(metrics, sapply(self$metrics, function(m) m$name))
+    metrics
+  }
+)
+

Let’s try this out:

+
+# Construct and compile an instance of CustomModel
+inputs <- keras_input(shape = 32)
+outputs <- layer_dense(inputs, 1)
+model <- CustomModel(inputs, outputs)
+model |> compile(optimizer = "adam", loss = "mse", metrics = "mae")
+
+# Just use `fit` as usual
+x <- random_normal(c(1000, 32))
+y <- random_normal(c(1000, 1))
+model |> fit(x, y, epochs = 3)
+
## Epoch 1/3
+## 32/32 - 1s - 23ms/step - loss: 2.9118 - mae: 1.3597
+## Epoch 2/3
+## 32/32 - 0s - 1ms/step - loss: 2.6026 - mae: 1.2856
+## Epoch 3/3
+## 32/32 - 0s - 1ms/step - loss: 2.3378 - mae: 1.2193
+
+
+

Going lower-level +

+

Naturally, you could just skip passing a loss function in +compile(), and instead do everything manually in +train_step. Likewise for metrics.

+

Here’s a lower-level example, that only uses compile() +to configure the optimizer:

+
    +
  • We start by creating Metric instances to track our loss +and a MAE score (in __init__()).
  • +
  • We implement a custom train_step() that updates the +state of these metrics (by calling update_state() on them), +then query them (via result()) to return their current +average value, to be displayed by the progress bar and to be pass to any +callback.
  • +
  • Note that we would need to call reset_states() on our +metrics between each epoch! Otherwise calling result() +would return an average since the start of training, whereas we usually +work with per-epoch averages. Thankfully, the framework can do that for +us: just list any metric you want to reset in the metrics +property of the model. The model will call reset_states() +on any object listed here at the beginning of each fit() +epoch or at the beginning of a call to evaluate().
  • +
+
+CustomModel <- new_model_class(
+  "CustomModel",
+  initialize = function(...) {
+    super$initialize(...)
+    self$loss_tracker <- metric_mean(name = "loss")
+    self$mae_metric <- metric_mean_absolute_error(name = "mae")
+    self$loss_fn <- loss_mean_squared_error()
+  },
+  train_step = function(data) {
+    c(x, y, sample_weight) %<-% unpack_x_y_sample_weight(data)
+
+    with(tf$GradientTape() %as% tape, {
+      y_pred <- self(x, training = TRUE)
+      loss <- self$loss_fn(y, y_pred, sample_weight = sample_weight)
+    })
+
+    # Compute gradients
+    trainable_vars <- self$trainable_variables
+    gradients <- tape$gradient(loss, trainable_vars)
+
+    # Update weights
+    self$optimizer$apply(gradients, trainable_vars)
+
+    # Compute our own metrics
+    self$loss_tracker$update_state(loss)
+    self$mae_metric$update_state(y, y_pred)
+
+    # Return a dict mapping metric names to current value
+    list(
+      loss = self$loss_tracker$result(),
+      mae = self$mae_metric$result()
+    )
+  },
+  metrics = mark_active(function() {
+    # We list our `Metric` objects here so that `reset_states()` can be
+    # called automatically at the start of each epoch
+    # or at the start of `evaluate()`.
+    list(self$loss_tracker, self$mae_metric)
+  })
+)
+
+
+# Construct and compile an instance of CustomModel
+inputs <- keras_input(shape = 32)
+outputs <- layer_dense(inputs, 1)
+model <- CustomModel(inputs, outputs)
+
+# We don't pass a loss or metrics here.
+model |> compile(optimizer = "adam")
+
+# Just use `fit` as usual
+x <- random_normal(c(1000, 32))
+y <- random_normal(c(1000, 1))
+model |> fit(x, y, epochs = 3)
+
## Epoch 1/3
+## 32/32 - 1s - 19ms/step - loss: 2.6540 - mae: 1.2901
+## Epoch 2/3
+## 32/32 - 0s - 2ms/step - loss: 2.4139 - mae: 1.2303
+## Epoch 3/3
+## 32/32 - 0s - 2ms/step - loss: 2.2080 - mae: 1.1761
+
+
+

Supporting sample_weight & +class_weight +

+

You may have noticed that our first basic example didn’t make any +mention of sample weighting. If you want to support the +fit() arguments sample_weight and +class_weight, you’d simply do the following:

+
    +
  • Unpack sample_weight from the data +argument
  • +
  • Pass it to compute_loss & update_state +(of course, you could also just apply it manually if you don’t rely on +compile() for losses & metrics)
  • +
  • That’s it.
  • +
+
+CustomModel <- new_model_class(
+  "CustomModel",
+  train_step = function(data) {
+    c(x, y, sample_weight) %<-% unpack_x_y_sample_weight(data)
+
+    with(tf$GradientTape() %as% tape, {
+      y_pred <- self(x, training = TRUE)
+      loss <- self$compute_loss(y = y, y_pred = y_pred,
+                                sample_weight = sample_weight)
+    })
+
+    # Compute gradients
+    trainable_vars <- self$trainable_variables
+    gradients <- tape$gradient(loss, trainable_vars)
+
+    # Update weights
+    self$optimizer$apply_gradients(zip_lists(gradients, trainable_vars))
+
+    # Update metrics (includes the metric that tracks the loss)
+    for (metric in self$metrics) {
+      if (metric$name == "loss") {
+        metric$update_state(loss)
+      } else {
+        metric$update_state(y, y_pred, sample_weight = sample_weight)
+      }
+    }
+
+    # Return a dict mapping metric names to current value
+    metrics <- lapply(self$metrics, function(m) m$result())
+    metrics <- setNames(metrics, sapply(self$metrics, function(m) m$name))
+    metrics
+  }
+)
+
+
+# Construct and compile an instance of CustomModel
+inputs <- keras_input(shape = 32)
+outputs <- layer_dense(inputs, units = 1)
+model <- CustomModel(inputs, outputs)
+model |> compile(optimizer = "adam", loss = "mse", metrics = "mae")
+
+# You can now use sample_weight argument
+x <- random_normal(c(1000, 32))
+y <- random_normal(c(1000, 1))
+sw <- random_normal(c(1000, 1))
+model |> fit(x, y, sample_weight = sw, epochs = 3)
+
## Epoch 1/3
+## 32/32 - 1s - 21ms/step - loss: 0.1607 - mae: 1.3018
+## Epoch 2/3
+## 32/32 - 0s - 1ms/step - loss: 0.1452 - mae: 1.2999
+## Epoch 3/3
+## 32/32 - 0s - 2ms/step - loss: 0.1335 - mae: 1.2986
+
+
+

Providing your own evaluation step +

+

What if you want to do the same for calls to +model.evaluate()? Then you would override +test_step in exactly the same way. Here’s what it looks +like:

+
+CustomModel <- new_model_class(
+  "CustomModel",
+  test_step = function(data) {
+    # Unpack the data
+    c(x, y, sw) %<-% unpack_x_y_sample_weight(data)
+    # Compute predictions
+    y_pred = self(x, training = FALSE)
+    # Updates the metrics tracking the loss
+    self$compute_loss(y = y, y_pred = y_pred, sample_weight = sw)
+    # Update the metrics.
+    for (metric in self$metrics) {
+      if (metric$name != "loss") {
+        metric$update_state(y, y_pred, sample_weight = sw)
+      }
+    }
+    # Return a dict mapping metric names to current value.
+    # Note that it will include the loss (tracked in self.metrics).
+    metrics <- lapply(self$metrics, function(m) m$result())
+    metrics <- setNames(metrics, sapply(self$metrics, function(m) m$name))
+    metrics
+  }
+)
+
+# Construct an instance of CustomModel
+inputs <- keras_input(shape = 32)
+outputs <- layer_dense(inputs, 1)
+model <- CustomModel(inputs, outputs)
+model |> compile(loss = "mse", metrics = "mae")
+
+# Evaluate with our custom test_step
+x <- random_normal(c(1000, 32))
+y <- random_normal(c(1000, 1))
+model |> evaluate(x, y)
+
## 32/32 - 0s - 9ms/step - loss: 0.0000e+00 - mae: 1.3947
+
## $loss
+## [1] 0
+##
+## $mae
+## [1] 1.394695
+
+
+

Wrapping up: an end-to-end GAN example +

+

Let’s walk through an end-to-end example that leverages everything +you just learned.

+

Let’s consider:

+
    +
  • A generator network meant to generate 28x28x1 images.
  • +
  • A discriminator network meant to classify 28x28x1 images into two +classes (“fake” and “real”).
  • +
  • One optimizer for each.
  • +
  • A loss function to train the discriminator.
  • +
+
+# Create the discriminator
+discriminator <-
+  keras_model_sequential(name = "discriminator", input_shape = c(28, 28, 1)) |>
+
+  layer_conv_2d(filters = 64, kernel_size = c(3, 3),
+                strides = c(2, 2),  padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+
+  layer_conv_2d(filters = 128, kernel_size = c(3, 3),
+                strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+
+  layer_global_max_pooling_2d() |>
+  layer_dense(units = 1)
+
+
+# Create the generator
+latent_dim <- 128
+generator <-
+  keras_model_sequential(name = "generator", input_shape = latent_dim) |>
+
+  layer_dense(7 * 7 * 128) |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+
+  layer_reshape(target_shape = c(7, 7, 128)) |>
+
+  layer_conv_2d_transpose(filters = 128, kernel_size = c(4, 4),
+                          strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+
+  layer_conv_2d_transpose(filters = 128, kernel_size = c(4, 4),
+                          strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+
+  layer_conv_2d(filters = 1, kernel_size = c(7, 7), padding = "same",
+                activation = "sigmoid")
+

Here’s a feature-complete GAN class, overriding +compile() to use its own signature, and implementing the +entire GAN algorithm in 17 lines in train_step:

+
+GAN <- Model(
+  classname = "GAN",
+
+  initialize = function(discriminator, generator, latent_dim, ...) {
+    super$initialize(...)
+    self$discriminator <- discriminator
+    self$generator <- generator
+    self$latent_dim <- as.integer(latent_dim)
+    self$d_loss_tracker <- metric_mean(name = "d_loss")
+    self$g_loss_tracker <- metric_mean(name = "g_loss")
+  },
+
+  compile = function(d_optimizer, g_optimizer, loss_fn, ...) {
+    super$compile(...)
+    self$d_optimizer <- d_optimizer
+    self$g_optimizer <- g_optimizer
+    self$loss_fn <- loss_fn
+  },
+  metrics = active_property(function() {
+    list(self$d_loss_tracker, self$g_loss_tracker)
+  }),
+
+  train_step = function(real_images) {
+
+    # Sample random points in the latent space
+    batch_size <- shape(real_images)[[1]]
+    random_latent_vectors <-
+      tf$random$normal(shape(batch_size, self$latent_dim))
+
+    # Decode them to fake images
+    generated_images <- self$generator(random_latent_vectors)
+
+    # Combine them with real images
+    combined_images <- op_concatenate(list(generated_images,
+                                           real_images))
+
+    # Assemble labels discriminating real from fake images
+    labels <- op_concatenate(list(op_ones(c(batch_size, 1)),
+                                  op_zeros(c(batch_size, 1))))
+
+    # Add random noise to the labels - important trick!
+    labels %<>% `+`(tf$random$uniform(shape(.), maxval = 0.05))
+
+    # Train the discriminator
+    with(tf$GradientTape() %as% tape, {
+      predictions <- self$discriminator(combined_images)
+      d_loss <- self$loss_fn(labels, predictions)
+    })
+    grads <- tape$gradient(d_loss, self$discriminator$trainable_weights)
+    self$d_optimizer$apply_gradients(
+      zip_lists(grads, self$discriminator$trainable_weights))
+
+    # Sample random points in the latent space
+    random_latent_vectors <-
+      tf$random$normal(shape(batch_size, self$latent_dim))
+
+    # Assemble labels that say "all real images"
+    misleading_labels <- op_zeros(c(batch_size, 1))
+
+    # Train the generator (note that we should *not* update the weights
+    # of the discriminator)!
+    with(tf$GradientTape() %as% tape, {
+      predictions <- self$discriminator(self$generator(random_latent_vectors))
+      g_loss <- self$loss_fn(misleading_labels, predictions)
+    })
+    grads <- tape$gradient(g_loss, self$generator$trainable_weights)
+    self$g_optimizer$apply_gradients(
+      zip_lists(grads, self$generator$trainable_weights))
+
+    list(d_loss = d_loss, g_loss = g_loss)
+  }
+)
+

Let’s test-drive it:

+
+batch_size <- 64
+c(c(x_train, .), c(x_test, .)) %<-% dataset_mnist()
+all_digits <- op_concatenate(list(x_train, x_test))
+all_digits <- op_reshape(all_digits, c(-1, 28, 28, 1))
+dataset <- all_digits |>
+  tfdatasets::tensor_slices_dataset() |>
+  tfdatasets::dataset_map(\(x) op_cast(x, "float32") / 255) |>
+  tfdatasets::dataset_shuffle(buffer_size = 1024) |>
+  tfdatasets::dataset_batch(batch_size = batch_size)
+
+gan <- GAN(discriminator = discriminator,
+           generator = generator,
+           latent_dim = latent_dim)
+
+gan |> compile(
+  d_optimizer = optimizer_adam(learning_rate = 0.0003),
+  g_optimizer = optimizer_adam(learning_rate = 0.0003),
+  loss_fn = loss_binary_crossentropy(from_logits = TRUE)
+)
+
+# To limit the execution time, we only train on 100 batches. You can train on
+# the entire dataset. You will need about 20 epochs to get nice results.
+gan |> fit(
+  tfdatasets::dataset_take(dataset, 100),
+  epochs = 1
+)
+
## 100/100 - 5s - 53ms/step - d_loss: 0.0000e+00 - g_loss: 0.0000e+00
+

The ideas behind deep learning are simple, so why should their +implementation be painful?

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/custom_train_step_in_torch.html b/docs/articles/custom_train_step_in_torch.html new file mode 100644 index 0000000000..05011619e7 --- /dev/null +++ b/docs/articles/custom_train_step_in_torch.html @@ -0,0 +1,601 @@ + + + + + + + + +Customizing what happens in `fit()` with PyTorch • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

When you’re doing supervised learning, you can use fit() +and everything works smoothly.

+

When you need to take control of every little detail, you can write +your own training loop entirely from scratch.

+

But what if you need a custom training algorithm, but you still want +to benefit from the convenient features of fit(), such as +callbacks, built-in distribution support, or step fusing?

+

A core principle of Keras is progressive disclosure of +complexity. You should always be able to get into lower-level +workflows in a gradual way. You shouldn’t fall off a cliff if the +high-level functionality doesn’t exactly match your use case. You should +be able to gain more control over the small details while retaining a +commensurate amount of high-level convenience.

+

When you need to customize what fit() does, you should +override the training step function of the Model +class. This is the function that is called by +fit() for every batch of data. You will then be able to +call fit() as usual – and it will be running your own +learning algorithm.

+

Note that this pattern does not prevent you from building models with +the Functional API. You can do this whether you’re building +Sequential models, Functional API models, or subclassed +models.

+

Let’s see how that works.

+
+
+

Setup +

+
import os
+
+# This guide can only be run with the torch backend.
+os.environ["KERAS_BACKEND"] = "torch"
+
+import torch
+import keras
+from keras import layers
+import numpy as np
+
+
+

A first simple example +

+

Let’s start from a simple example:

+
    +
  • We create a new class that subclasses keras.Model.
  • +
  • We just override the method +train_step(self, data).
  • +
  • We return a dictionary mapping metric names (including the loss) to +their current value.
  • +
+

The input argument data is what gets passed to fit as +training data:

+
    +
  • If you pass NumPy arrays, by calling fit(x, y, ...), +then data will be the tuple (x, y) +
  • +
  • If you pass a torch.utils.data.DataLoader or a +tf.data.Dataset, by calling fit(dataset, ...), +then data will be what gets yielded by dataset +at each batch.
  • +
+

In the body of the train_step() method, we implement a +regular training update, similar to what you are already familiar with. +Importantly, we compute the loss via +self.compute_loss(), which wraps the loss(es) +function(s) that were passed to compile().

+

Similarly, we call metric.update_state(y, y_pred) on +metrics from self.metrics, to update the state of the +metrics that were passed in compile(), and we query results +from self.metrics at the end to retrieve their current +value.

+
class CustomModel(keras.Model):
+    def train_step(self, data):
+        # Unpack the data. Its structure depends on your model and
+        # on what you pass to `fit()`.
+        x, y = data
+
+        # Call torch.nn.Module.zero_grad() to clear the leftover gradients
+        # for the weights from the previous train step.
+        self.zero_grad()
+
+        # Compute loss
+        y_pred = self(x, training=True)  # Forward pass
+        loss = self.compute_loss(y=y, y_pred=y_pred)
+
+        # Call torch.Tensor.backward() on the loss to compute gradients
+        # for the weights.
+        loss.backward()
+
+        trainable_weights = [v for v in self.trainable_weights]
+        gradients = [v.value.grad for v in trainable_weights]
+
+        # Update weights
+        with torch.no_grad():
+            self.optimizer.apply(gradients, trainable_weights)
+
+        # Update metrics (includes the metric that tracks the loss)
+        for metric in self.metrics:
+            if metric.name == "loss":
+                metric.update_state(loss)
+            else:
+                metric.update_state(y, y_pred)
+
+        # Return a dict mapping metric names to current value
+        # Note that it will include the loss (tracked in self.metrics).
+        return {m.name: m.result() for m in self.metrics}
+

Let’s try this out:

+
# Construct and compile an instance of CustomModel
+inputs = keras.Input(shape=(32,))
+outputs = keras.layers.Dense(1)(inputs)
+model = CustomModel(inputs, outputs)
+model.compile(optimizer="adam", loss="mse", metrics=["mae"])
+
+# Just use `fit` as usual
+x = np.random.random((1000, 32))
+y = np.random.random((1000, 1))
+model.fit(x, y, epochs=3)
+
+
+

Going lower-level +

+

Naturally, you could just skip passing a loss function in +compile(), and instead do everything manually in +train_step. Likewise for metrics.

+

Here’s a lower-level example, that only uses compile() +to configure the optimizer:

+
    +
  • We start by creating Metric instances to track our loss +and a MAE score (in __init__()).
  • +
  • We implement a custom train_step() that updates the +state of these metrics (by calling update_state() on them), +then query them (via result()) to return their current +average value, to be displayed by the progress bar and to be pass to any +callback.
  • +
  • Note that we would need to call reset_states() on our +metrics between each epoch! Otherwise calling result() +would return an average since the start of training, whereas we usually +work with per-epoch averages. Thankfully, the framework can do that for +us: just list any metric you want to reset in the metrics +property of the model. The model will call reset_states() +on any object listed here at the beginning of each fit() +epoch or at the beginning of a call to evaluate().
  • +
+
class CustomModel(keras.Model):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.loss_tracker = keras.metrics.Mean(name="loss")
+        self.mae_metric = keras.metrics.MeanAbsoluteError(name="mae")
+        self.loss_fn = keras.losses.MeanSquaredError()
+
+    def train_step(self, data):
+        x, y = data
+
+        # Call torch.nn.Module.zero_grad() to clear the leftover gradients
+        # for the weights from the previous train step.
+        self.zero_grad()
+
+        # Compute loss
+        y_pred = self(x, training=True)  # Forward pass
+        loss = self.loss_fn(y, y_pred)
+
+        # Call torch.Tensor.backward() on the loss to compute gradients
+        # for the weights.
+        loss.backward()
+
+        trainable_weights = [v for v in self.trainable_weights]
+        gradients = [v.value.grad for v in trainable_weights]
+
+        # Update weights
+        with torch.no_grad():
+            self.optimizer.apply(gradients, trainable_weights)
+
+        # Compute our own metrics
+        self.loss_tracker.update_state(loss)
+        self.mae_metric.update_state(y, y_pred)
+        return {
+            "loss": self.loss_tracker.result(),
+            "mae": self.mae_metric.result(),
+        }
+
+    @property
+    def metrics(self):
+        # We list our `Metric` objects here so that `reset_states()` can be
+        # called automatically at the start of each epoch
+        # or at the start of `evaluate()`.
+        return [self.loss_tracker, self.mae_metric]
+
+
+# Construct an instance of CustomModel
+inputs = keras.Input(shape=(32,))
+outputs = keras.layers.Dense(1)(inputs)
+model = CustomModel(inputs, outputs)
+
+# We don't passs a loss or metrics here.
+model.compile(optimizer="adam")
+
+# Just use `fit` as usual -- you can use callbacks, etc.
+x = np.random.random((1000, 32))
+y = np.random.random((1000, 1))
+model.fit(x, y, epochs=5)
+
+
+

Supporting sample_weight & +class_weight +

+

You may have noticed that our first basic example didn’t make any +mention of sample weighting. If you want to support the +fit() arguments sample_weight and +class_weight, you’d simply do the following:

+
    +
  • Unpack sample_weight from the data +argument
  • +
  • Pass it to compute_loss & update_state +(of course, you could also just apply it manually if you don’t rely on +compile() for losses & metrics)
  • +
  • That’s it.
  • +
+
class CustomModel(keras.Model):
+    def train_step(self, data):
+        # Unpack the data. Its structure depends on your model and
+        # on what you pass to `fit()`.
+        if len(data) == 3:
+            x, y, sample_weight = data
+        else:
+            sample_weight = None
+            x, y = data
+
+        # Call torch.nn.Module.zero_grad() to clear the leftover gradients
+        # for the weights from the previous train step.
+        self.zero_grad()
+
+        # Compute loss
+        y_pred = self(x, training=True)  # Forward pass
+        loss = self.compute_loss(
+            y=y,
+            y_pred=y_pred,
+            sample_weight=sample_weight,
+        )
+
+        # Call torch.Tensor.backward() on the loss to compute gradients
+        # for the weights.
+        loss.backward()
+
+        trainable_weights = [v for v in self.trainable_weights]
+        gradients = [v.value.grad for v in trainable_weights]
+
+        # Update weights
+        with torch.no_grad():
+            self.optimizer.apply(gradients, trainable_weights)
+
+        # Update metrics (includes the metric that tracks the loss)
+        for metric in self.metrics:
+            if metric.name == "loss":
+                metric.update_state(loss)
+            else:
+                metric.update_state(y, y_pred, sample_weight=sample_weight)
+
+        # Return a dict mapping metric names to current value
+        # Note that it will include the loss (tracked in self.metrics).
+        return {m.name: m.result() for m in self.metrics}
+
+
+# Construct and compile an instance of CustomModel
+inputs = keras.Input(shape=(32,))
+outputs = keras.layers.Dense(1)(inputs)
+model = CustomModel(inputs, outputs)
+model.compile(optimizer="adam", loss="mse", metrics=["mae"])
+
+# You can now use sample_weight argument
+x = np.random.random((1000, 32))
+y = np.random.random((1000, 1))
+sw = np.random.random((1000, 1))
+model.fit(x, y, sample_weight=sw, epochs=3)
+
+
+

Providing your own evaluation step +

+

What if you want to do the same for calls to +model.evaluate()? Then you would override +test_step in exactly the same way. Here’s what it looks +like:

+
class CustomModel(keras.Model):
+    def test_step(self, data):
+        # Unpack the data
+        x, y = data
+        # Compute predictions
+        y_pred = self(x, training=False)
+        # Updates the metrics tracking the loss
+        loss = self.compute_loss(y=y, y_pred=y_pred)
+        # Update the metrics.
+        for metric in self.metrics:
+            if metric.name == "loss":
+                metric.update_state(loss)
+            else:
+                metric.update_state(y, y_pred)
+        # Return a dict mapping metric names to current value.
+        # Note that it will include the loss (tracked in self.metrics).
+        return {m.name: m.result() for m in self.metrics}
+
+
+# Construct an instance of CustomModel
+inputs = keras.Input(shape=(32,))
+outputs = keras.layers.Dense(1)(inputs)
+model = CustomModel(inputs, outputs)
+model.compile(loss="mse", metrics=["mae"])
+
+# Evaluate with our custom test_step
+x = np.random.random((1000, 32))
+y = np.random.random((1000, 1))
+model.evaluate(x, y)
+
+
+

Wrapping up: an end-to-end GAN example +

+

Let’s walk through an end-to-end example that leverages everything +you just learned.

+

Let’s consider:

+
    +
  • A generator network meant to generate 28x28x1 images.
  • +
  • A discriminator network meant to classify 28x28x1 images into two +classes (“fake” and “real”).
  • +
  • One optimizer for each.
  • +
  • A loss function to train the discriminator.
  • +
+
# Create the discriminator
+discriminator = keras.Sequential(
+    [
+        keras.Input(shape=(28, 28, 1)),
+        layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same"),
+        layers.LeakyReLU(negative_slope=0.2),
+        layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same"),
+        layers.LeakyReLU(negative_slope=0.2),
+        layers.GlobalMaxPooling2D(),
+        layers.Dense(1),
+    ],
+    name="discriminator",
+)
+
+# Create the generator
+latent_dim = 128
+generator = keras.Sequential(
+    [
+        keras.Input(shape=(latent_dim,)),
+        # We want to generate 128 coefficients to reshape into a 7x7x128 map
+        layers.Dense(7 * 7 * 128),
+        layers.LeakyReLU(negative_slope=0.2),
+        layers.Reshape((7, 7, 128)),
+        layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"),
+        layers.LeakyReLU(negative_slope=0.2),
+        layers.Conv2DTranspose(128, (4, 4), strides=(2, 2), padding="same"),
+        layers.LeakyReLU(negative_slope=0.2),
+        layers.Conv2D(1, (7, 7), padding="same", activation="sigmoid"),
+    ],
+    name="generator",
+)
+

Here’s a feature-complete GAN class, overriding +compile() to use its own signature, and implementing the +entire GAN algorithm in 17 lines in train_step:

+
class GAN(keras.Model):
+    def __init__(self, discriminator, generator, latent_dim):
+        super().__init__()
+        self.discriminator = discriminator
+        self.generator = generator
+        self.latent_dim = latent_dim
+        self.d_loss_tracker = keras.metrics.Mean(name="d_loss")
+        self.g_loss_tracker = keras.metrics.Mean(name="g_loss")
+        self.seed_generator = keras.random.SeedGenerator(1337)
+        self.built = True
+
+    @property
+    def metrics(self):
+        return [self.d_loss_tracker, self.g_loss_tracker]
+
+    def compile(self, d_optimizer, g_optimizer, loss_fn):
+        super().compile()
+        self.d_optimizer = d_optimizer
+        self.g_optimizer = g_optimizer
+        self.loss_fn = loss_fn
+
+    def train_step(self, real_images):
+        if isinstance(real_images, tuple):
+            real_images = real_images[0]
+        # Sample random points in the latent space
+        batch_size = real_images.shape[0]
+        random_latent_vectors = keras.random.normal(
+            shape=(batch_size, self.latent_dim), seed=self.seed_generator
+        )
+
+        # Decode them to fake images
+        generated_images = self.generator(random_latent_vectors)
+
+        # Combine them with real images
+        real_images = torch.tensor(real_images)
+        combined_images = torch.concat([generated_images, real_images], axis=0)
+
+        # Assemble labels discriminating real from fake images
+        labels = torch.concat(
+            [torch.ones((batch_size, 1)), torch.zeros((batch_size, 1))], axis=0
+        )
+        # Add random noise to the labels - important trick!
+        labels += 0.05 * keras.random.uniform(
+            labels.shape, seed=self.seed_generator
+        )
+
+        # Train the discriminator
+        self.zero_grad()
+        predictions = self.discriminator(combined_images)
+        d_loss = self.loss_fn(labels, predictions)
+        d_loss.backward()
+        grads = [v.value.grad for v in self.discriminator.trainable_weights]
+        with torch.no_grad():
+            self.d_optimizer.apply(grads, self.discriminator.trainable_weights)
+
+        # Sample random points in the latent space
+        random_latent_vectors = keras.random.normal(
+            shape=(batch_size, self.latent_dim), seed=self.seed_generator
+        )
+
+        # Assemble labels that say "all real images"
+        misleading_labels = torch.zeros((batch_size, 1))
+
+        # Train the generator (note that we should *not* update the weights
+        # of the discriminator)!
+        self.zero_grad()
+        predictions = self.discriminator(self.generator(random_latent_vectors))
+        g_loss = self.loss_fn(misleading_labels, predictions)
+        grads = g_loss.backward()
+        grads = [v.value.grad for v in self.generator.trainable_weights]
+        with torch.no_grad():
+            self.g_optimizer.apply(grads, self.generator.trainable_weights)
+
+        # Update metrics and return their value.
+        self.d_loss_tracker.update_state(d_loss)
+        self.g_loss_tracker.update_state(g_loss)
+        return {
+            "d_loss": self.d_loss_tracker.result(),
+            "g_loss": self.g_loss_tracker.result(),
+        }
+

Let’s test-drive it:

+
# Prepare the dataset. We use both the training & test MNIST digits.
+batch_size = 64
+(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()
+all_digits = np.concatenate([x_train, x_test])
+all_digits = all_digits.astype("float32") / 255.0
+all_digits = np.reshape(all_digits, (-1, 28, 28, 1))
+
+# Create a TensorDataset
+dataset = torch.utils.data.TensorDataset(
+    torch.from_numpy(all_digits), torch.from_numpy(all_digits)
+)
+# Create a DataLoader
+dataloader = torch.utils.data.DataLoader(
+    dataset, batch_size=batch_size, shuffle=True
+)
+
+gan = GAN(
+    discriminator=discriminator, generator=generator, latent_dim=latent_dim
+)
+gan.compile(
+    d_optimizer=keras.optimizers.Adam(learning_rate=0.0003),
+    g_optimizer=keras.optimizers.Adam(learning_rate=0.0003),
+    loss_fn=keras.losses.BinaryCrossentropy(from_logits=True),
+)
+
+gan.fit(dataloader, epochs=1)
+

The ideas behind deep learning are simple, so why should their +implementation be painful?

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/customizing_saving_and_serialization.html b/docs/articles/customizing_saving_and_serialization.html new file mode 100644 index 0000000000..b856541c22 --- /dev/null +++ b/docs/articles/customizing_saving_and_serialization.html @@ -0,0 +1,465 @@ + + + + + + + + +Customizing Saving and Serialization • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This guide covers advanced methods that can be customized in Keras +saving. For most users, the methods outlined in the primary Serialize, save, and export +guide are sufficient.

+
+

APIs +

+

We will cover the following APIs:

+
    +
  • +save_assets() and load_assets() +
  • +
  • +save_own_variables() and +load_own_variables() +
  • +
  • +get_build_config() and +build_from_config() +
  • +
  • +get_compile_config() and +compile_from_config() +
  • +
+

When restoring a model, these get executed in the following +order:

+
    +
  • build_from_config()
  • +
  • compile_from_config()
  • +
  • load_own_variables()
  • +
  • load_assets()
  • +
+
+
+
+

Setup +

+
import os
+import numpy as np
+import keras
+
+
+

State saving customization +

+

These methods determine how the state of your model’s layers is saved +when calling model.save(). You can override them to take +full control of the state saving process.

+
+

+save_own_variables() and +load_own_variables() +

+

These methods save and load the state variables of the layer when +model.save() and keras.models.load_model() are +called, respectively. By default, the state variables saved and loaded +are the weights of the layer (both trainable and non-trainable). Here is +the default implementation of save_own_variables():

+
def save_own_variables(self, store):
+    all_vars = self._trainable_weights + self._non_trainable_weights
+    for i, v in enumerate(all_vars):
+        store[f"{i}"] = v.numpy()
+

The store used by these methods is a dictionary that can be populated +with the layer variables. Let’s take a look at an example customizing +this.

+

Example:

+
@keras.utils.register_keras_serializable(package="my_custom_package")
+class LayerWithCustomVariable(keras.layers.Dense):
+    def __init__(self, units, **kwargs):
+        super().__init__(units, **kwargs)
+        self.my_variable = keras.Variable(
+            np.random.random((units,)), name="my_variable", dtype="float32"
+        )
+
+    def save_own_variables(self, store):
+        super().save_own_variables(store)
+        # Stores the value of the variable upon saving
+        store["variables"] = self.my_variable.numpy()
+
+    def load_own_variables(self, store):
+        # Assigns the value of the variable upon loading
+        self.my_variable.assign(store["variables"])
+        # Load the remaining weights
+        for i, v in enumerate(self.weights):
+            v.assign(store[f"{i}"])
+        # Note: You must specify how all variables (including layer weights)
+        # are loaded in `load_own_variables.`
+
+    def call(self, inputs):
+        dense_out = super().call(inputs)
+        return dense_out + self.my_variable
+
+
+model = keras.Sequential([LayerWithCustomVariable(1)])
+
+ref_input = np.random.random((8, 10))
+ref_output = np.random.random((8, 10))
+model.compile(optimizer="adam", loss="mean_squared_error")
+model.fit(ref_input, ref_output)
+
+model.save("custom_vars_model.keras")
+restored_model = keras.models.load_model("custom_vars_model.keras")
+
+np.testing.assert_allclose(
+    model.layers[0].my_variable.numpy(),
+    restored_model.layers[0].my_variable.numpy(),
+)
+
+
+

+save_assets() and load_assets() +

+

These methods can be added to your model class definition to store +and load any additional information that your model needs.

+

For example, NLP domain layers such as TextVectorization layers and +IndexLookup layers may need to store their associated vocabulary (or +lookup table) in a text file upon saving.

+

Let’s take at the basics of this workflow with a simple file +assets.txt.

+

Example:

+
@keras.saving.register_keras_serializable(package="my_custom_package")
+class LayerWithCustomAssets(keras.layers.Dense):
+    def __init__(self, vocab=None, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.vocab = vocab
+
+    def save_assets(self, inner_path):
+        # Writes the vocab (sentence) to text file at save time.
+        with open(os.path.join(inner_path, "vocabulary.txt"), "w") as f:
+            f.write(self.vocab)
+
+    def load_assets(self, inner_path):
+        # Reads the vocab (sentence) from text file at load time.
+        with open(os.path.join(inner_path, "vocabulary.txt"), "r") as f:
+            text = f.read()
+        self.vocab = text.replace("<unk>", "little")
+
+
+model = keras.Sequential(
+    [LayerWithCustomAssets(vocab="Mary had a <unk> lamb.", units=5)]
+)
+
+x = np.random.random((10, 10))
+y = model(x)
+
+model.save("custom_assets_model.keras")
+restored_model = keras.models.load_model("custom_assets_model.keras")
+
+np.testing.assert_string_equal(
+    restored_model.layers[0].vocab, "Mary had a little lamb."
+)
+
+
+
+

+build and compile saving +customization +

+
+

+get_build_config() and +build_from_config() +

+

These methods work together to save the layer’s built states and +restore them upon loading.

+

By default, this only includes a build config dictionary with the +layer’s input shape, but overriding these methods can be used to include +further Variables and Lookup Tables that can be useful to restore for +your built model.

+

Example:

+
@keras.saving.register_keras_serializable(package="my_custom_package")
+class LayerWithCustomBuild(keras.layers.Layer):
+    def __init__(self, units=32, **kwargs):
+        super().__init__(**kwargs)
+        self.units = units
+
+    def call(self, inputs):
+        return keras.ops.matmul(inputs, self.w) + self.b
+
+    def get_config(self):
+        return dict(units=self.units, **super().get_config())
+
+    def build(self, input_shape, layer_init):
+        # Note the overriding of `build()` to add an extra argument.
+        # Therefore, we will need to manually call build with `layer_init` argument
+        # before the first execution of `call()`.
+        super().build(input_shape)
+        self._input_shape = input_shape
+        self.w = self.add_weight(
+            shape=(input_shape[-1], self.units),
+            initializer=layer_init,
+            trainable=True,
+        )
+        self.b = self.add_weight(
+            shape=(self.units,),
+            initializer=layer_init,
+            trainable=True,
+        )
+        self.layer_init = layer_init
+
+    def get_build_config(self):
+        build_config = {
+            "layer_init": self.layer_init,
+            "input_shape": self._input_shape,
+        }  # Stores our initializer for `build()`
+        return build_config
+
+    def build_from_config(self, config):
+        # Calls `build()` with the parameters at loading time
+        self.build(config["input_shape"], config["layer_init"])
+
+
+custom_layer = LayerWithCustomBuild(units=16)
+custom_layer.build(input_shape=(8,), layer_init="random_normal")
+
+model = keras.Sequential(
+    [
+        custom_layer,
+        keras.layers.Dense(1, activation="sigmoid"),
+    ]
+)
+
+x = np.random.random((16, 8))
+y = model(x)
+
+model.save("custom_build_model.keras")
+restored_model = keras.models.load_model("custom_build_model.keras")
+
+np.testing.assert_equal(restored_model.layers[0].layer_init, "random_normal")
+np.testing.assert_equal(restored_model.built, True)
+
+
+

+get_compile_config() and +compile_from_config() +

+

These methods work together to save the information with which the +model was compiled (optimizers, losses, etc.) and restore and re-compile +the model with this information.

+

Overriding these methods can be useful for compiling the restored +model with custom optimizers, custom losses, etc., as these will need to +be deserialized prior to calling model.compile in +compile_from_config().

+

Let’s take a look at an example of this.

+

Example:

+
@keras.saving.register_keras_serializable(package="my_custom_package")
+def small_square_sum_loss(y_true, y_pred):
+    loss = keras.ops.square(y_pred - y_true)
+    loss = loss / 10.0
+    loss = keras.ops.sum(loss, axis=1)
+    return loss
+
+
+@keras.saving.register_keras_serializable(package="my_custom_package")
+def mean_pred(y_true, y_pred):
+    return keras.ops.mean(y_pred)
+
+
+@keras.saving.register_keras_serializable(package="my_custom_package")
+class ModelWithCustomCompile(keras.Model):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.dense1 = keras.layers.Dense(8, activation="relu")
+        self.dense2 = keras.layers.Dense(4, activation="softmax")
+
+    def call(self, inputs):
+        x = self.dense1(inputs)
+        return self.dense2(x)
+
+    def compile(self, optimizer, loss_fn, metrics):
+        super().compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)
+        self.model_optimizer = optimizer
+        self.loss_fn = loss_fn
+        self.loss_metrics = metrics
+
+    def get_compile_config(self):
+        # These parameters will be serialized at saving time.
+        return {
+            "model_optimizer": self.model_optimizer,
+            "loss_fn": self.loss_fn,
+            "metric": self.loss_metrics,
+        }
+
+    def compile_from_config(self, config):
+        # Deserializes the compile parameters (important, since many are custom)
+        optimizer = keras.utils.deserialize_keras_object(config["model_optimizer"])
+        loss_fn = keras.utils.deserialize_keras_object(config["loss_fn"])
+        metrics = keras.utils.deserialize_keras_object(config["metric"])
+
+        # Calls compile with the deserialized parameters
+        self.compile(optimizer=optimizer, loss_fn=loss_fn, metrics=metrics)
+
+
+model = ModelWithCustomCompile()
+model.compile(
+    optimizer="SGD", loss_fn=small_square_sum_loss, metrics=["accuracy", mean_pred]
+)
+
+x = np.random.random((4, 8))
+y = np.random.random((4,))
+
+model.fit(x, y)
+
+model.save("custom_compile_model.keras")
+restored_model = keras.models.load_model("custom_compile_model.keras")
+
+np.testing.assert_equal(model.model_optimizer, restored_model.model_optimizer)
+np.testing.assert_equal(model.loss_fn, restored_model.loss_fn)
+np.testing.assert_equal(model.loss_metrics, restored_model.loss_metrics)
+
+
+
+

Conclusion +

+

Using the methods learned in this tutorial allows for a wide variety +of use cases, allowing the saving and loading of complex models with +exotic assets and state elements. To recap:

+
    +
  • +save_own_variables and load_own_variables +determine how your states are saved and loaded.
  • +
  • +save_assets and load_assets can be added +to store and load any additional information your model needs.
  • +
  • +get_build_config and build_from_config +save and restore the model’s built states.
  • +
  • +get_compile_config and compile_from_config +save and restore the model’s compiled states.
  • +
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/distributed_training_with_jax.html b/docs/articles/distributed_training_with_jax.html new file mode 100644 index 0000000000..94a389ea9d --- /dev/null +++ b/docs/articles/distributed_training_with_jax.html @@ -0,0 +1,399 @@ + + + + + + + + +Multi-GPU distributed training with JAX • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

There are generally two ways to distribute computation across +multiple devices:

+

Data parallelism, where a single model gets +replicated on multiple devices or multiple machines. Each of them +processes different batches of data, then they merge their results. +There exist many variants of this setup, that differ in how the +different model replicas merge results, in whether they stay in sync at +every batch or whether they are more loosely coupled, etc.

+

Model parallelism, where different parts of a single +model run on different devices, processing a single batch of data +together. This works best with models that have a naturally-parallel +architecture, such as models that feature multiple branches.

+

This guide focuses on data parallelism, in particular +synchronous data parallelism, where the different +replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you +would see for single-device training.

+

Specifically, this guide teaches you how to use +jax.sharding APIs to train Keras models, with minimal +changes to your code, on multiple GPUs or TPUS (typically 2 to 16) +installed on a single machine (single host, multi-device training). This +is the most common setup for researchers and small-scale industry +workflows.

+
+
+

Setup +

+

Let’s start by defining the function that creates the model that we +will train, and the function that creates the dataset we will train on +(MNIST in this case).

+
import os
+
+os.environ["KERAS_BACKEND"] = "jax"
+
+import jax
+import numpy as np
+import tensorflow as tf
+import keras
+
+from jax.experimental import mesh_utils
+from jax.sharding import Mesh
+from jax.sharding import NamedSharding
+from jax.sharding import PartitionSpec as P
+
+
+def get_model():
+    # Make a simple convnet with batch normalization and dropout.
+    inputs = keras.Input(shape=(28, 28, 1))
+    x = keras.layers.Rescaling(1.0 / 255.0)(inputs)
+    x = keras.layers.Conv2D(
+        filters=12, kernel_size=3, padding="same", use_bias=False
+    )(x)
+    x = keras.layers.BatchNormalization(scale=False, center=True)(x)
+    x = keras.layers.ReLU()(x)
+    x = keras.layers.Conv2D(
+        filters=24,
+        kernel_size=6,
+        use_bias=False,
+        strides=2,
+    )(x)
+    x = keras.layers.BatchNormalization(scale=False, center=True)(x)
+    x = keras.layers.ReLU()(x)
+    x = keras.layers.Conv2D(
+        filters=32,
+        kernel_size=6,
+        padding="same",
+        strides=2,
+        name="large_k",
+    )(x)
+    x = keras.layers.BatchNormalization(scale=False, center=True)(x)
+    x = keras.layers.ReLU()(x)
+    x = keras.layers.GlobalAveragePooling2D()(x)
+    x = keras.layers.Dense(256, activation="relu")(x)
+    x = keras.layers.Dropout(0.5)(x)
+    outputs = keras.layers.Dense(10)(x)
+    model = keras.Model(inputs, outputs)
+    return model
+
+
+def get_datasets():
+    # Load the data and split it between train and test sets
+    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+
+    # Scale images to the [0, 1] range
+    x_train = x_train.astype("float32")
+    x_test = x_test.astype("float32")
+    # Make sure images have shape (28, 28, 1)
+    x_train = np.expand_dims(x_train, -1)
+    x_test = np.expand_dims(x_test, -1)
+    print("x_train shape:", x_train.shape)
+    print(x_train.shape[0], "train samples")
+    print(x_test.shape[0], "test samples")
+
+    # Create TF Datasets
+    train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
+    eval_data = tf.data.Dataset.from_tensor_slices((x_test, y_test))
+    return train_data, eval_data
+
+
+

Single-host, multi-device synchronous training +

+

In this setup, you have one machine with several GPUs or TPUs on it +(typically 2 to 16). Each device will run a copy of your model (called a +replica). For simplicity, in what follows, we’ll assume +we’re dealing with 8 GPUs, at no loss of generality.

+

How it works

+

At each step of training:

+
    +
  • The current batch of data (called global batch) is +split into 8 different sub-batches (called local +batches). For instance, if the global batch has 512 samples, +each of the 8 local batches will have 64 samples.
  • +
  • Each of the 8 replicas independently processes a local batch: they +run a forward pass, then a backward pass, outputting the gradient of the +weights with respect to the loss of the model on the local batch.
  • +
  • The weight updates originating from local gradients are efficiently +merged across the 8 replicas. Because this is done at the end of every +step, the replicas always stay in sync.
  • +
+

In practice, the process of synchronously updating the weights of the +model replicas is handled at the level of each individual weight +variable. This is done through a using a +jax.sharding.NamedSharding that is configured to replicate +the variables.

+

How to use it

+

To do single-host, multi-device synchronous training with a Keras +model, you would use the jax.sharding features. Here’s how +it works:

+
    +
  • We first create a device mesh using +mesh_utils.create_device_mesh.
  • +
  • We use jax.sharding.Mesh, +jax.sharding.NamedSharding and +jax.sharding.PartitionSpec to define how to partition JAX +arrays. +
      +
    • We specify that we want to replicate the model and optimizer +variables across all devices by using a spec with no axis.
    • +
    • We specify that we want to shard the data across devices by using a +spec that splits along the batch dimension.
    • +
    +
  • +
  • We use jax.device_put to replicate the model and +optimizer variables across devices. This happens once at the +beginning.
  • +
  • In the training loop, for each batch that we process, we use +jax.device_put to split the batch across devices before +invoking the train step.
  • +
+

Here’s the flow, where each step is split into its own utility +function:

+
# Config
+num_epochs = 2
+batch_size = 64
+
+train_data, eval_data = get_datasets()
+train_data = train_data.batch(batch_size, drop_remainder=True)
+
+model = get_model()
+optimizer = keras.optimizers.Adam(1e-3)
+loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
+
+# Initialize all state with .build()
+(one_batch, one_batch_labels) = next(iter(train_data))
+model.build(one_batch)
+optimizer.build(model.trainable_variables)
+
+
+# This is the loss function that will be differentiated.
+# Keras provides a pure functional forward pass: model.stateless_call
+def compute_loss(trainable_variables, non_trainable_variables, x, y):
+    y_pred, updated_non_trainable_variables = model.stateless_call(
+        trainable_variables, non_trainable_variables, x
+    )
+    loss_value = loss(y, y_pred)
+    return loss_value, updated_non_trainable_variables
+
+
+# Function to compute gradients
+compute_gradients = jax.value_and_grad(compute_loss, has_aux=True)
+
+
+# Training step, Keras provides a pure functional optimizer.stateless_apply
+@jax.jit
+def train_step(train_state, x, y):
+    (
+        trainable_variables,
+        non_trainable_variables,
+        optimizer_variables,
+    ) = train_state
+    (loss_value, non_trainable_variables), grads = compute_gradients(
+        trainable_variables, non_trainable_variables, x, y
+    )
+
+    trainable_variables, optimizer_variables = optimizer.stateless_apply(
+        optimizer_variables, grads, trainable_variables
+    )
+
+    return loss_value, (
+        trainable_variables,
+        non_trainable_variables,
+        optimizer_variables,
+    )
+
+
+# Replicate the model and optimizer variable on all devices
+def get_replicated_train_state(devices):
+    # All variables will be replicated on all devices
+    var_mesh = Mesh(devices, axis_names=("_"))
+    # In NamedSharding, axes not mentioned are replicated (all axes here)
+    var_replication = NamedSharding(var_mesh, P())
+
+    # Apply the distribution settings to the model variables
+    trainable_variables = jax.device_put(
+        model.trainable_variables, var_replication
+    )
+    non_trainable_variables = jax.device_put(
+        model.non_trainable_variables, var_replication
+    )
+    optimizer_variables = jax.device_put(optimizer.variables, var_replication)
+
+    # Combine all state in a tuple
+    return (trainable_variables, non_trainable_variables, optimizer_variables)
+
+
+num_devices = len(jax.local_devices())
+print(f"Running on {num_devices} devices: {jax.local_devices()}")
+devices = mesh_utils.create_device_mesh((num_devices,))
+
+# Data will be split along the batch axis
+data_mesh = Mesh(devices, axis_names=("batch",))  # naming axes of the mesh
+data_sharding = NamedSharding(
+    data_mesh,
+    P(
+        "batch",
+    ),
+)  # naming axes of the sharded partition
+
+# Display data sharding
+x, y = next(iter(train_data))
+sharded_x = jax.device_put(x.numpy(), data_sharding)
+print("Data sharding")
+jax.debug.visualize_array_sharding(jax.numpy.reshape(sharded_x, [-1, 28 * 28]))
+
+train_state = get_replicated_train_state(devices)
+
+# Custom training loop
+for epoch in range(num_epochs):
+    data_iter = iter(train_data)
+    for data in data_iter:
+        x, y = data
+        sharded_x = jax.device_put(x.numpy(), data_sharding)
+        loss_value, train_state = train_step(train_state, sharded_x, y.numpy())
+    print("Epoch", epoch, "loss:", loss_value)
+
+# Post-processing model state update to write them back into the model
+trainable_variables, non_trainable_variables, optimizer_variables = train_state
+for variable, value in zip(model.trainable_variables, trainable_variables):
+    variable.assign(value)
+for variable, value in zip(
+    model.non_trainable_variables, non_trainable_variables
+):
+    variable.assign(value)
+

That’s it!

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/distributed_training_with_tensorflow.html b/docs/articles/distributed_training_with_tensorflow.html new file mode 100644 index 0000000000..2a399b8b2f --- /dev/null +++ b/docs/articles/distributed_training_with_tensorflow.html @@ -0,0 +1,414 @@ + + + + + + + + +Multi-GPU distributed training with TensorFlow • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

There are generally two ways to distribute computation across +multiple devices:

+

Data parallelism, where a single model gets +replicated on multiple devices or multiple machines. Each of them +processes different batches of data, then they merge their results. +There exist many variants of this setup, that differ in how the +different model replicas merge results, in whether they stay in sync at +every batch or whether they are more loosely coupled, etc.

+

Model parallelism, where different parts of a single +model run on different devices, processing a single batch of data +together. This works best with models that have a naturally-parallel +architecture, such as models that feature multiple branches.

+

This guide focuses on data parallelism, in particular +synchronous data parallelism, where the different +replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you +would see for single-device training.

+

Specifically, this guide teaches you how to use the +tf.distribute API to train Keras models on multiple GPUs, +with minimal changes to your code, on multiple GPUs (typically 2 to 16) +installed on a single machine (single host, multi-device training). This +is the most common setup for researchers and small-scale industry +workflows.

+
+
+

Setup +

+
+library(keras3)
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+
+
+

Single-host, multi-device synchronous training +

+

In this setup, you have one machine with several GPUs on it +(typically 2 to 16). Each device will run a copy of your model (called a +replica). For simplicity, in what follows, we’ll assume +we’re dealing with 8 GPUs, at no loss of generality.

+

How it works

+

At each step of training:

+
    +
  • The current batch of data (called global batch) is +split into 8 different sub-batches (called local +batches). For instance, if the global batch has 512 samples, +each of the 8 local batches will have 64 samples.
  • +
  • Each of the 8 replicas independently processes a local batch: they +run a forward pass, then a backward pass, outputting the gradient of the +weights with respect to the loss of the model on the local batch.
  • +
  • The weight updates originating from local gradients are efficiently +merged across the 8 replicas. Because this is done at the end of every +step, the replicas always stay in sync.
  • +
+

In practice, the process of synchronously updating the weights of the +model replicas is handled at the level of each individual weight +variable. This is done through a mirrored variable +object.

+

How to use it

+

To do single-host, multi-device synchronous training with a Keras +model, you would use the tf$distribute$MirroredStrategy +API. Here’s how it works:

+
    +
  • Instantiate a MirroredStrategy, optionally configuring +which specific devices you want to use (by default the strategy will use +all GPUs available).
  • +
  • Use the strategy object to open a scope, and within this scope, +create all the Keras objects you need that contain variables. Typically, +that means creating & compiling the model inside +the distribution scope. In some cases, the first call to +fit() may also create variables, so it’s a good idea to put +your fit() call in the scope as well.
  • +
  • Train the model via fit() as usual.
  • +
+

Importantly, we recommend that you use tf.data.Dataset +objects to load data in a multi-device or distributed workflow.

+

Schematically, it looks like this:

+
+# Create a MirroredStrategy.
+strategy <- tf$distribute$MirroredStrategy()
+cat(sprintf('Number of devices: %d\n', strategy$num_replicas_in_sync))
+
+# Open a strategy scope.
+with(startegy$scope(), {
+  # Everything that creates variables should be under the strategy scope.
+  # In general this is only model construction & `compile()`.
+  model <- Model(...)
+  model |> compile(...)
+
+  # Train the model on all available devices.
+  model |> fit(train_dataset, validation_data=val_dataset, ...)
+
+  # Test the model on all available devices.
+  model |> evaluate(test_dataset)
+})
+

Here’s a simple end-to-end runnable example:

+
+get_compiled_model <- function() {
+  inputs <- keras_input(shape = 784)
+  outputs <- inputs |>
+    layer_dense(units = 256, activation = "relu") |>
+    layer_dense(units = 256, activation = "relu") |>
+    layer_dense(units = 10)
+  model <- keras_model(inputs, outputs)
+  model |> compile(
+    optimizer = optimizer_adam(),
+    loss = loss_sparse_categorical_crossentropy(from_logits = TRUE),
+    metrics = list(metric_sparse_categorical_accuracy()),
+
+    # XLA compilation is temporarily disabled due to a bug
+    # https://github.com/keras-team/keras/issues/19005
+    jit_compile = FALSE
+  )
+  model
+}
+
+get_dataset <- function(batch_size = 64) {
+
+  c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+  x_train <- array_reshape(x_train, c(-1, 784))
+  x_test <- array_reshape(x_test, c(-1, 784))
+
+  # Reserve 10,000 samples for validation.
+  val_i <- sample.int(nrow(x_train), 10000)
+  x_val <- x_train[val_i,]
+  y_val <- y_train[val_i]
+  x_train = x_train[-val_i,]
+  y_train = y_train[-val_i]
+
+  # Prepare the training dataset.
+  train_dataset <- list(x_train, y_train) |>
+    tensor_slices_dataset() |>
+    dataset_batch(batch_size)
+
+  # Prepare the validation dataset.
+  val_dataset <- list(x_val, y_val) |>
+    tensor_slices_dataset() |>
+    dataset_batch(batch_size)
+
+  # Prepare the test dataset.
+  test_dataset <- list(x_test, y_test) |>
+    tensor_slices_dataset() |>
+    dataset_batch(batch_size)
+
+  list(train_dataset, val_dataset, test_dataset)
+}
+
+# Create a MirroredStrategy.
+strategy <- tf$distribute$MirroredStrategy()
+cat(sprintf('Number of devices: %d\n', strategy$num_replicas_in_sync))
+
## Number of devices: 2
+
+# Open a strategy scope.
+with(strategy$scope(), {
+  # Everything that creates variables should be under the strategy scope.
+  # In general this is only model construction & `compile()`.
+  model <- get_compiled_model()
+
+  c(train_dataset, val_dataset, test_dataset) %<-% get_dataset()
+
+  # Train the model on all available devices.
+  model |> fit(train_dataset, epochs = 2, validation_data = val_dataset)
+
+  # Test the model on all available devices.
+  model |> evaluate(test_dataset)
+
+})
+
## Epoch 1/2
+## 782/782 - 6s - 7ms/step - loss: 3.0622 - sparse_categorical_accuracy: 0.8615 - val_loss: 1.1367 - val_sparse_categorical_accuracy: 0.9006
+## Epoch 2/2
+## 782/782 - 3s - 4ms/step - loss: 0.5774 - sparse_categorical_accuracy: 0.9259 - val_loss: 0.6612 - val_sparse_categorical_accuracy: 0.9210
+## 157/157 - 0s - 3ms/step - loss: 0.6729 - sparse_categorical_accuracy: 0.9150
+
## $loss
+## [1] 0.6728871
+##
+## $sparse_categorical_accuracy
+## [1] 0.915
+
+
+

Using callbacks to ensure fault tolerance +

+

When using distributed training, you should always make sure you have +a strategy to recover from failure (fault tolerance). The simplest way +to handle this is to pass ModelCheckpoint callback to +fit(), to save your model at regular intervals (e.g. every +100 batches or every epoch). You can then restart training from your +saved model.

+

Here’s a simple example:

+
+# Prepare a directory to store all the checkpoints.
+checkpoint_dir <- "./ckpt"
+if (!dir.exists(checkpoint_dir)) {
+  dir.create(checkpoint_dir)
+}
+
+make_or_restore_model <- function() {
+  # Either restore the latest model, or create a fresh one
+  # if there is no checkpoint available.
+  checkpoints <- list.files(checkpoint_dir, #pattern = "ckpt-.*\\.keras",
+                            full.names = TRUE)
+
+  if (length(checkpoints) > 0) {
+    checkpoint_epochs <- as.integer(sub("ckpt-([0-9]+)\\.keras", "\\1",
+                                        basename(checkpoints)))
+    latest_checkpoint <- checkpoints[which.max(checkpoint_epochs)]
+    load_model(latest_checkpoint)
+  } else {
+    get_compiled_model()
+  }
+}
+
+
+
+run_training <- function(epochs = 1) {
+  # Create a MirroredStrategy.
+  strategy <- tf$distribute$MirroredStrategy()
+
+  # Open a strategy scope and create/restore the model
+  with(strategy$scope(), {
+    model <- make_or_restore_model()
+
+    callbacks <- list(
+      # This callback saves a SavedModel every epoch
+      # We include the current epoch in the folder name.
+      callback_model_checkpoint(
+        filepath = paste0(checkpoint_dir, "/ckpt-{epoch}.keras"),
+        save_freq = "epoch"
+      ))
+
+    model |> fit(
+      train_dataset,
+      epochs = epochs,
+      callbacks = callbacks,
+      validation_data = val_dataset,
+      verbose = 2
+    )
+  })
+}
+
+# Running the first time creates the model
+run_training(epochs = 1)
+
## 782/782 - 6s - 7ms/step - loss: 2.9519 - sparse_categorical_accuracy: 0.8655 - val_loss: 1.3110 - val_sparse_categorical_accuracy: 0.8836
+
+# Calling the same function again will resume from where we left off
+run_training(epochs = 1)
+
## 782/782 - 3s - 4ms/step - loss: 0.5998 - sparse_categorical_accuracy: 0.9270 - val_loss: 0.8736 - val_sparse_categorical_accuracy: 0.9128
+
+
+

+tf$data performance tips +

+

When doing distributed training, the efficiency with which you load +data can often become critical. Here are a few tips to make sure your +tf$data pipelines run as fast as possible.

+

Note about dataset batching

+

When creating your dataset, make sure it is batched with the global +batch size. For instance, if each of your 8 GPUs is capable of running a +batch of 64 samples, you call use a global batch size of 512.

+

Calling dataset_cache()

+

If you call dataset_cache() on a dataset, its data will +be cached after running through the first iteration over the data. Every +subsequent iteration will use the cached data. The cache can be in +memory (default) or to a local file you specify.

+

This can improve performance when:

+
    +
  • Your data is not expected to change from iteration to iteration
  • +
  • You are reading data from a remote distributed filesystem
  • +
  • You are reading data from local disk, but your data would fit in +memory and your workflow is significantly IO-bound (e.g. reading & +decoding image files).
  • +
+

Calling +dataset_prefetch(buffer_size)

+

You should almost always call +dataset_prefetch(buffer_size) after creating a dataset. It +means your data pipeline will run asynchronously from your model, with +new samples being preprocessed and stored in a buffer while the current +batch samples are used to train the model. The next batch will be +prefetched in GPU memory by the time the current batch is over.

+

That’s it!

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/distributed_training_with_torch.html b/docs/articles/distributed_training_with_torch.html new file mode 100644 index 0000000000..95cebd5c0c --- /dev/null +++ b/docs/articles/distributed_training_with_torch.html @@ -0,0 +1,384 @@ + + + + + + + + +Multi-GPU distributed training with PyTorch • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

There are generally two ways to distribute computation across +multiple devices:

+

Data parallelism, where a single model gets +replicated on multiple devices or multiple machines. Each of them +processes different batches of data, then they merge their results. +There exist many variants of this setup, that differ in how the +different model replicas merge results, in whether they stay in sync at +every batch or whether they are more loosely coupled, etc.

+

Model parallelism, where different parts of a single +model run on different devices, processing a single batch of data +together. This works best with models that have a naturally-parallel +architecture, such as models that feature multiple branches.

+

This guide focuses on data parallelism, in particular +synchronous data parallelism, where the different +replicas of the model stay in sync after each batch they process. +Synchronicity keeps the model convergence behavior identical to what you +would see for single-device training.

+

Specifically, this guide teaches you how to use PyTorch’s +DistributedDataParallel module wrapper to train Keras, with +minimal changes to your code, on multiple GPUs (typically 2 to 16) +installed on a single machine (single host, multi-device training). This +is the most common setup for researchers and small-scale industry +workflows.

+
+
+

Setup +

+

Let’s start by defining the function that creates the model that we +will train, and the function that creates the dataset we will train on +(MNIST in this case).

+
import os
+
+os.environ["KERAS_BACKEND"] = "torch"
+
+import torch
+import numpy as np
+import keras
+
+
+def get_model():
+    # Make a simple convnet with batch normalization and dropout.
+    inputs = keras.Input(shape=(28, 28, 1))
+    x = keras.layers.Rescaling(1.0 / 255.0)(inputs)
+    x = keras.layers.Conv2D(
+        filters=12, kernel_size=3, padding="same", use_bias=False
+    )(x)
+    x = keras.layers.BatchNormalization(scale=False, center=True)(x)
+    x = keras.layers.ReLU()(x)
+    x = keras.layers.Conv2D(
+        filters=24,
+        kernel_size=6,
+        use_bias=False,
+        strides=2,
+    )(x)
+    x = keras.layers.BatchNormalization(scale=False, center=True)(x)
+    x = keras.layers.ReLU()(x)
+    x = keras.layers.Conv2D(
+        filters=32,
+        kernel_size=6,
+        padding="same",
+        strides=2,
+        name="large_k",
+    )(x)
+    x = keras.layers.BatchNormalization(scale=False, center=True)(x)
+    x = keras.layers.ReLU()(x)
+    x = keras.layers.GlobalAveragePooling2D()(x)
+    x = keras.layers.Dense(256, activation="relu")(x)
+    x = keras.layers.Dropout(0.5)(x)
+    outputs = keras.layers.Dense(10)(x)
+    model = keras.Model(inputs, outputs)
+    return model
+
+
+def get_dataset():
+    # Load the data and split it between train and test sets
+    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+
+    # Scale images to the [0, 1] range
+    x_train = x_train.astype("float32")
+    x_test = x_test.astype("float32")
+    # Make sure images have shape (28, 28, 1)
+    x_train = np.expand_dims(x_train, -1)
+    x_test = np.expand_dims(x_test, -1)
+    print("x_train shape:", x_train.shape)
+
+    # Create a TensorDataset
+    dataset = torch.utils.data.TensorDataset(
+        torch.from_numpy(x_train), torch.from_numpy(y_train)
+    )
+    return dataset
+

Next, let’s define a simple PyTorch training loop that targets a GPU +(note the calls to .cuda()).

+
def train_model(model, dataloader, num_epochs, optimizer, loss_fn):
+    for epoch in range(num_epochs):
+        running_loss = 0.0
+        running_loss_count = 0
+        for batch_idx, (inputs, targets) in enumerate(dataloader):
+            inputs = inputs.cuda(non_blocking=True)
+            targets = targets.cuda(non_blocking=True)
+
+            # Forward pass
+            outputs = model(inputs)
+            loss = loss_fn(outputs, targets)
+
+            # Backward and optimize
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+
+            running_loss += loss.item()
+            running_loss_count += 1
+
+        # Print loss statistics
+        print(
+            f"Epoch {epoch + 1}/{num_epochs}, "
+            f"Loss: {running_loss / running_loss_count}"
+        )
+
+
+

Single-host, multi-device synchronous training +

+

In this setup, you have one machine with several GPUs on it +(typically 2 to 16). Each device will run a copy of your model (called a +replica). For simplicity, in what follows, we’ll assume +we’re dealing with 8 GPUs, at no loss of generality.

+

How it works

+

At each step of training:

+
    +
  • The current batch of data (called global batch) is +split into 8 different sub-batches (called local +batches). For instance, if the global batch has 512 samples, +each of the 8 local batches will have 64 samples.
  • +
  • Each of the 8 replicas independently processes a local batch: they +run a forward pass, then a backward pass, outputting the gradient of the +weights with respect to the loss of the model on the local batch.
  • +
  • The weight updates originating from local gradients are efficiently +merged across the 8 replicas. Because this is done at the end of every +step, the replicas always stay in sync.
  • +
+

In practice, the process of synchronously updating the weights of the +model replicas is handled at the level of each individual weight +variable. This is done through a mirrored variable +object.

+

How to use it

+

To do single-host, multi-device synchronous training with a Keras +model, you would use the +torch.nn.parallel.DistributedDataParallel module wrapper. +Here’s how it works:

+
    +
  • We use torch.multiprocessing.start_processes to start +multiple Python processes, one per device. Each process will run the +per_device_launch_fn function.
  • +
  • The per_device_launch_fn function does the following: +
      +
    • It uses torch.distributed.init_process_group and +torch.cuda.set_device to configure the device to be used +for that process.
    • +
    • It uses torch.utils.data.distributed.DistributedSampler +and torch.utils.data.DataLoader to turn our data into a +distributed data loader.
    • +
    • It also uses torch.nn.parallel.DistributedDataParallel +to turn our model into a distributed PyTorch module.
    • +
    • It then calls the train_model function.
    • +
    +
  • +
  • The train_model function will then run in each process, +with the model using a separate device in each process.
  • +
+

Here’s the flow, where each step is split into its own utility +function:

+
# Config
+num_gpu = torch.cuda.device_count()
+num_epochs = 2
+batch_size = 64
+print(f"Running on {num_gpu} GPUs")
+
+
+def setup_device(current_gpu_index, num_gpus):
+    # Device setup
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = "56492"
+    device = torch.device("cuda:{}".format(current_gpu_index))
+    torch.distributed.init_process_group(
+        backend="nccl",
+        init_method="env://",
+        world_size=num_gpus,
+        rank=current_gpu_index,
+    )
+    torch.cuda.set_device(device)
+
+
+def cleanup():
+    torch.distributed.destroy_process_group()
+
+
+def prepare_dataloader(dataset, current_gpu_index, num_gpus, batch_size):
+    sampler = torch.utils.data.distributed.DistributedSampler(
+        dataset,
+        num_replicas=num_gpus,
+        rank=current_gpu_index,
+        shuffle=False,
+    )
+    dataloader = torch.utils.data.DataLoader(
+        dataset,
+        sampler=sampler,
+        batch_size=batch_size,
+        shuffle=False,
+    )
+    return dataloader
+
+
+def per_device_launch_fn(current_gpu_index, num_gpu):
+    # Setup the process groups
+    setup_device(current_gpu_index, num_gpu)
+
+    dataset = get_dataset()
+    model = get_model()
+
+    # prepare the dataloader
+    dataloader = prepare_dataloader(
+        dataset, current_gpu_index, num_gpu, batch_size
+    )
+
+    # Instantiate the torch optimizer
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+    # Instantiate the torch loss function
+    loss_fn = torch.nn.CrossEntropyLoss()
+
+    # Put model on device
+    model = model.to(current_gpu_index)
+    ddp_model = torch.nn.parallel.DistributedDataParallel(
+        model, device_ids=[current_gpu_index], output_device=current_gpu_index
+    )
+
+    train_model(ddp_model, dataloader, num_epochs, optimizer, loss_fn)
+
+    cleanup()
+

Time to start multiple processes:

+
if __name__ == "__main__":
+    # We use the "fork" method rather than "spawn" to support notebooks
+    torch.multiprocessing.start_processes(
+        per_device_launch_fn,
+        args=(num_gpu,),
+        nprocs=num_gpu,
+        join=True,
+        start_method="fork",
+    )
+

That’s it!

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/distribution.html b/docs/articles/distribution.html new file mode 100644 index 0000000000..336002a4c7 --- /dev/null +++ b/docs/articles/distribution.html @@ -0,0 +1,429 @@ + + + + + + + + +Distributed training with Keras 3 • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

The Keras distribution API is a new interface designed to facilitate +distributed deep learning across a variety of backends like JAX, +TensorFlow and PyTorch. This powerful API introduces a suite of tools +enabling data and model parallelism, allowing for efficient scaling of +deep learning models on multiple accelerators and hosts. Whether +leveraging the power of GPUs or TPUs, the API provides a streamlined +approach to initializing distributed environments, defining device +meshes, and orchestrating the layout of tensors across computational +resources. Through classes like DataParallel and +ModelParallel, it abstracts the complexity involved in +parallel computation, making it easier for developers to accelerate +their machine learning workflows.

+
+
+

How it works +

+

The Keras distribution API provides a global programming model that +allows developers to compose applications that operate on tensors in a +global context (as if working with a single device) while automatically +managing distribution across many devices. The API leverages the +underlying framework (e.g. JAX) to distribute the program and tensors +according to the sharding directives through a procedure called single +program, multiple data (SPMD) expansion.

+

By decoupling the application from sharding directives, the API +enables running the same application on a single device, multiple +devices, or even multiple clients, while preserving its global +semantics.

+
+
+

Setup +

+
+# This guide assumes there are 8 GPUs available for testing. If you don't have
+# 8 gpus available locally, you can set the following envvar to
+# make xla initialize the CPU as 8 devices, to enable local testing
+Sys.setenv("CUDA_VISIBLE_DEVICES" = "")
+Sys.setenv("XLA_FLAGS" = "--xla_force_host_platform_device_count=8")
+
+library(keras3)
+
+# The distribution API is only implemented for the JAX backend for now.
+use_backend("jax")
+jax <- reticulate::import("jax")
+
+library(tfdatasets, exclude = "shape") # For dataset input.
+
+
+

+DeviceMesh and TensorLayout +

+

The keras$distribution$DeviceMesh class in Keras +distribution API represents a cluster of computational devices +configured for distributed computation. It aligns with similar concepts +in jax.sharding.Mesh +and tf.dtensor.Mesh, +where it’s used to map the physical devices to a logical mesh +structure.

+

The TensorLayout class then specifies how tensors are +distributed across the DeviceMesh, detailing the sharding +of tensors along specified axes that correspond to the names of the axes +in the DeviceMesh.

+

You can find more detailed concept explainers in the TensorFlow +DTensor guide.

+
+# Retrieve the local available gpu devices.
+devices <- jax$devices() # "gpu"
+str(devices)
+
## List of 8
+##  $ :TFRT_CPU_0
+##  $ :TFRT_CPU_1
+##  $ :TFRT_CPU_2
+##  $ :TFRT_CPU_3
+##  $ :TFRT_CPU_4
+##  $ :TFRT_CPU_5
+##  $ :TFRT_CPU_6
+##  $ :TFRT_CPU_7
+
+# Define a 2x4 device mesh with data and model parallel axes
+mesh <- keras$distribution$DeviceMesh(
+  shape = shape(2, 4),
+  axis_names = list("data", "model"),
+  devices = devices
+)
+
+# A 2D layout, which describes how a tensor is distributed across the
+# mesh. The layout can be visualized as a 2D grid with "model" as rows and
+# "data" as columns, and it is a [4, 2] grid when it mapped to the physical
+# devices on the mesh.
+layout_2d <- keras$distribution$TensorLayout(
+  axes = c("model", "data"),
+  device_mesh = mesh
+)
+
+# A 4D layout which could be used for data parallelism of an image input.
+replicated_layout_4d <- keras$distribution$TensorLayout(
+  axes = list("data", NULL, NULL, NULL),
+  device_mesh = mesh
+)
+
+
+

Distribution +

+

The Distribution class in Keras serves as a foundational +abstract class designed for developing custom distribution strategies. +It encapsulates the core logic needed to distribute a model’s variables, +input data, and intermediate computations across a device mesh. As an +end user, you won’t have to interact directly with this class, but its +subclasses like DataParallel or +ModelParallel.

+
+
+

DataParallel +

+

The DataParallel class in the Keras distribution API is +designed for the data parallelism strategy in distributed training, +where the model weights are replicated across all devices in the +DeviceMesh, and each device processes a portion of the +input data.

+

Here is a sample usage of this class.

+
+# Create DataParallel with list of devices.
+# As a shortcut, the devices can be skipped,
+# and Keras will detect all local available devices.
+# E.g. data_parallel <- DataParallel()
+data_parallel <- keras$distribution$DataParallel(devices = devices)
+
+# Or you can choose to create DataParallel with a 1D `DeviceMesh`.
+mesh_1d <- keras$distribution$DeviceMesh(
+  shape = shape(8),
+  axis_names = list("data"),
+  devices = devices
+)
+data_parallel <- keras$distribution$DataParallel(device_mesh = mesh_1d)
+
+inputs <- random_normal(c(128, 28, 28, 1))
+labels <- random_normal(c(128, 10))
+dataset <- tensor_slices_dataset(c(inputs, labels)) |>
+  dataset_batch(16)
+
+# Set the global distribution.
+keras$distribution$set_distribution(data_parallel)
+
+# Note that all the model weights from here on are replicated to
+# all the devices of the `DeviceMesh`. This includes the RNG
+# state, optimizer states, metrics, etc. The dataset fed into `model |> fit()` or
+# `model |> evaluate()` will be split evenly on the batch dimension, and sent to
+# all the devices. You don't have to do any manual aggregation of losses,
+# since all the computation happens in a global context.
+inputs <- keras_input(shape = c(28, 28, 1))
+outputs <- inputs |>
+  layer_flatten() |>
+  layer_dense(units = 200, use_bias = FALSE, activation = "relu") |>
+  layer_dropout(0.4) |>
+  layer_dense(units = 10, activation = "softmax")
+
+model <- keras_model(inputs = inputs, outputs = outputs)
+
+model |> compile(loss = "mse")
+model |> fit(dataset, epochs = 3)
+
## Epoch 1/3
+## 8/8 - 0s - 47ms/step - loss: 1.0629
+## Epoch 2/3
+## 8/8 - 0s - 7ms/step - loss: 0.9712
+## Epoch 3/3
+## 8/8 - 0s - 7ms/step - loss: 0.9322
+
+model |> evaluate(dataset)
+
## 8/8 - 0s - 7ms/step - loss: 0.8859
+
## $loss
+## [1] 0.8858577
+
+
+

+ModelParallel and LayoutMap +

+

ModelParallel will be mostly useful when model weights +are too large to fit on a single accelerator. This setting allows you to +spit your model weights or activation tensors across all the devices on +the DeviceMesh, and enable the horizontal scaling for the +large models.

+

Unlike the DataParallel model where all weights are +fully replicated, the weights layout under ModelParallel +usually need some customization for best performances. We introduce +LayoutMap to let you specify the TensorLayout +for any weights and intermediate tensors from global perspective.

+

LayoutMap is a dict-like object that maps a string to +TensorLayout instances. It behaves differently from a +normal dict in that the string key is treated as a regex when retrieving +the value. The class allows you to define the naming schema of +TensorLayout and then retrieve the corresponding +TensorLayout instance. Typically, the key used to query is +the variable$path attribute, which is the identifier of the +variable. As a shortcut, a list of axis names is also allowed when +inserting a value, and it will be converted to +TensorLayout.

+

The LayoutMap can also optionally contain a +DeviceMesh to populate the +TensorLayout$device_mesh if it is not set. When retrieving +a layout with a key, and if there isn’t an exact match, all existing +keys in the layout map will be treated as regex and matched against the +input key again. If there are multiple matches, a +ValueError is raised. If no matches are found, +NULL is returned.

+
+mesh_2d <- keras$distribution$DeviceMesh(
+  shape = shape(2, 4),
+  axis_names = c("data", "model"),
+  devices = devices
+)
+layout_map  <- keras$distribution$LayoutMap(mesh_2d)
+
+# The rule below means that for any weights that match with d1/kernel, it
+# will be sharded with model dimensions (4 devices), same for the d1/bias.
+# All other weights will be fully replicated.
+layout_map["d1/kernel"] <- tuple(NULL, "model")
+layout_map["d1/bias"] <- tuple("model")
+
+# You can also set the layout for the layer output like
+layout_map["d2/output"] <- tuple("data", NULL)
+
+model_parallel <- keras$distribution$ModelParallel(
+  mesh_2d, layout_map, batch_dim_name = "data"
+)
+
+keras$distribution$set_distribution(model_parallel)
+
+inputs <- layer_input(shape = c(28, 28, 1))
+outputs <- inputs |>
+  layer_flatten() |>
+  layer_dense(units = 200, use_bias = FALSE,
+              activation = "relu", name = "d1") |>
+  layer_dropout(0.4) |>
+  layer_dense(units = 10,
+              activation = "softmax",
+              name = "d2")
+
+model <- keras_model(inputs = inputs, outputs = outputs)
+
+# The data will be sharded across the "data" dimension of the method, which
+# has 2 devices.
+model |> compile(loss = "mse")
+model |> fit(dataset, epochs = 3)
+
## Epoch 1/3
+## 8/8 - 0s - 29ms/step - loss: 1.0714
+## Epoch 2/3
+## 8/8 - 0s - 4ms/step - loss: 0.9744
+## Epoch 3/3
+## 8/8 - 0s - 5ms/step - loss: 0.9280
+
+model |> evaluate(dataset)
+
## 8/8 - 0s - 9ms/step - loss: 0.8802
+
## $loss
+## [1] 0.8802156
+

It is also easy to change the mesh structure to tune the computation +between more data parallel or model parallel. You can do this by +adjusting the shape of the mesh. And no changes are needed for any other +code.

+
+full_data_parallel_mesh <- keras$distribution$DeviceMesh(
+  shape = shape(8, 1),
+  axis_names = list("data", "model"),
+  devices = devices
+)
+more_data_parallel_mesh <- keras$distribution$DeviceMesh(
+  shape = shape(4, 2),
+  axis_names = list("data", "model"),
+  devices = devices
+)
+more_model_parallel_mesh <- keras$distribution$DeviceMesh(
+  shape = shape(2, 4),
+  axis_names = list("data", "model"),
+  devices = devices
+)
+full_model_parallel_mesh <- keras$distribution$DeviceMesh(
+  shape = shape(1, 8),
+  axis_names = list("data", "model"),
+  devices = devices
+)
+ +
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/autoencoder.html b/docs/articles/examples/autoencoder.html new file mode 100644 index 0000000000..fcc011670a --- /dev/null +++ b/docs/articles/examples/autoencoder.html @@ -0,0 +1,599 @@ + + + + + + + + +Convolutional autoencoder for image denoising • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This example demonstrates how to implement a deep convolutional +autoencoder for image denoising, mapping noisy digits images from the +MNIST dataset to clean digits images. This implementation is based on an +original blog post titled Building +Autoencoders in Keras by François Chollet.

+
+
+

Setup +

+
+library(keras3)
+
+# Normalizes the supplied array and reshapes it.
+preprocess <- function(array) {
+  array_reshape(array/255, c(dim(array)[1], 28, 28, 1))
+}
+
+# Adds random noise to each image in the supplied array.
+noise <- function(array) {
+  noise_factor <- 0.4
+  noisy_array <- array + noise_factor * random_normal(dim(array))
+  op_clip(noisy_array, 0.0, 1.0)
+}
+
+display <- function(array1, array2) {
+  n <- 2
+  indices <- sample.int(dim(array1)[1], n)
+  images1 <- as.array(array1)[indices, , , ]
+  images2 <- as.array(array2)[indices, , , ]
+
+  par(mfrow = c(2, n), mar = c(0, 0, 0, 0))
+  for (i in seq_len(n)) {
+    plot(as.raster(images1[i, , ]))
+    plot(as.raster(images2[i, , ]))
+  }
+}
+
+
+

Prepare the data +

+
+# Since we only need images from the dataset to encode and decode, we
+# won't use the labels.
+c(c(train_data, .), c(test_data, .)) %<-% dataset_mnist()
+
+# Normalize and reshape the data
+train_data <- preprocess(train_data)
+test_data <- preprocess(test_data)
+
+# Create a copy of the data with added noise
+noisy_train_data <- noise(train_data)
+noisy_test_data <- noise(test_data)
+
+# Display the train data and a version of it with added noise
+display(train_data, noisy_train_data)
+
+plot of chunk unnamed-chunk-2
plot of chunk unnamed-chunk-2
+
+
+
+

Build the autoencoder +

+

We are going to use the Functional API to build our convolutional +autoencoder.

+
+input <- keras_input(shape = c(28, 28, 1))
+
+# Encoder
+enc <- input |>
+  layer_conv_2d(filters = 32, kernel_size = c(3, 3),
+                activation = "relu", padding = "same") |>
+  layer_max_pooling_2d(pool_size = c(2, 2), padding = "same") |>
+  layer_conv_2d(filters = 32, kernel_size = c(3, 3),
+                activation = "relu", padding = "same") |>
+  layer_max_pooling_2d(pool_size = c(2, 2), padding = "same")
+
+# Decoder
+dec <- enc |>
+  layer_conv_2d_transpose(filters = 32, kernel_size = c(3, 3), strides = 2,
+                          activation = "relu", padding = "same") |>
+  layer_conv_2d_transpose(filters = 32, kernel_size = c(3, 3), strides = 2,
+                          activation = "relu", padding = "same") |>
+  layer_conv_2d(filters = 1, kernel_size = c(3, 3),
+                activation = "sigmoid", padding = "same")
+
+# Autoencoder
+autoencoder <- keras_model(input, dec)
+autoencoder |> compile(optimizer = "adam", loss = "binary_crossentropy")
+autoencoder |> summary()
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ input_layer (InputLayer)        │ (None, 28, 28, 1)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 28, 28, 32)     │           320
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d_1 (MaxPooling2D)  │ (None, 14, 14, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d (Conv2D)                 │ (None, 14, 14, 32)     │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 7, 7, 32)       │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_1              │ (None, 14, 14, 32)     │         9,248
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose                │ (None, 28, 28, 32)     │         9,248
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 28, 28, 1)      │           289
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 28,353 (110.75 KB)
+##  Trainable params: 28,353 (110.75 KB)
+##  Non-trainable params: 0 (0.00 B)
+

Now we can train our autoencoder using train_data as +both our input data and target. Notice we are setting up the validation +data using the same format.

+
+autoencoder |> fit(
+  x = train_data,
+  y = train_data,
+  epochs = 50,
+  batch_size = 128,
+  shuffle = TRUE,
+  validation_data = list(test_data, test_data),
+)
+
## Epoch 1/50
+## 469/469 - 5s - 10ms/step - loss: 0.1322 - val_loss: 0.0736
+## Epoch 2/50
+## 469/469 - 1s - 3ms/step - loss: 0.0721 - val_loss: 0.0698
+## Epoch 3/50
+## 469/469 - 1s - 3ms/step - loss: 0.0695 - val_loss: 0.0681
+## Epoch 4/50
+## 469/469 - 1s - 3ms/step - loss: 0.0682 - val_loss: 0.0671
+## Epoch 5/50
+## 469/469 - 1s - 3ms/step - loss: 0.0673 - val_loss: 0.0665
+## Epoch 6/50
+## 469/469 - 1s - 3ms/step - loss: 0.0667 - val_loss: 0.0660
+## Epoch 7/50
+## 469/469 - 1s - 3ms/step - loss: 0.0663 - val_loss: 0.0656
+## Epoch 8/50
+## 469/469 - 1s - 3ms/step - loss: 0.0659 - val_loss: 0.0652
+## Epoch 9/50
+## 469/469 - 1s - 3ms/step - loss: 0.0655 - val_loss: 0.0649
+## Epoch 10/50
+## 469/469 - 1s - 3ms/step - loss: 0.0652 - val_loss: 0.0646
+## Epoch 11/50
+## 469/469 - 1s - 3ms/step - loss: 0.0650 - val_loss: 0.0644
+## Epoch 12/50
+## 469/469 - 1s - 3ms/step - loss: 0.0647 - val_loss: 0.0642
+## Epoch 13/50
+## 469/469 - 1s - 3ms/step - loss: 0.0645 - val_loss: 0.0640
+## Epoch 14/50
+## 469/469 - 1s - 3ms/step - loss: 0.0643 - val_loss: 0.0638
+## Epoch 15/50
+## 469/469 - 1s - 3ms/step - loss: 0.0642 - val_loss: 0.0636
+## Epoch 16/50
+## 469/469 - 1s - 3ms/step - loss: 0.0640 - val_loss: 0.0635
+## Epoch 17/50
+## 469/469 - 1s - 3ms/step - loss: 0.0639 - val_loss: 0.0634
+## Epoch 18/50
+## 469/469 - 1s - 3ms/step - loss: 0.0638 - val_loss: 0.0633
+## Epoch 19/50
+## 469/469 - 1s - 3ms/step - loss: 0.0636 - val_loss: 0.0631
+## Epoch 20/50
+## 469/469 - 1s - 3ms/step - loss: 0.0635 - val_loss: 0.0631
+## Epoch 21/50
+## 469/469 - 1s - 3ms/step - loss: 0.0634 - val_loss: 0.0630
+## Epoch 22/50
+## 469/469 - 1s - 3ms/step - loss: 0.0634 - val_loss: 0.0629
+## Epoch 23/50
+## 469/469 - 1s - 3ms/step - loss: 0.0633 - val_loss: 0.0628
+## Epoch 24/50
+## 469/469 - 1s - 3ms/step - loss: 0.0632 - val_loss: 0.0628
+## Epoch 25/50
+## 469/469 - 1s - 3ms/step - loss: 0.0631 - val_loss: 0.0627
+## Epoch 26/50
+## 469/469 - 1s - 3ms/step - loss: 0.0631 - val_loss: 0.0626
+## Epoch 27/50
+## 469/469 - 1s - 3ms/step - loss: 0.0630 - val_loss: 0.0626
+## Epoch 28/50
+## 469/469 - 1s - 3ms/step - loss: 0.0629 - val_loss: 0.0625
+## Epoch 29/50
+## 469/469 - 1s - 3ms/step - loss: 0.0629 - val_loss: 0.0625
+## Epoch 30/50
+## 469/469 - 1s - 3ms/step - loss: 0.0628 - val_loss: 0.0624
+## Epoch 31/50
+## 469/469 - 1s - 3ms/step - loss: 0.0628 - val_loss: 0.0624
+## Epoch 32/50
+## 469/469 - 1s - 3ms/step - loss: 0.0627 - val_loss: 0.0623
+## Epoch 33/50
+## 469/469 - 1s - 3ms/step - loss: 0.0627 - val_loss: 0.0623
+## Epoch 34/50
+## 469/469 - 1s - 3ms/step - loss: 0.0627 - val_loss: 0.0623
+## Epoch 35/50
+## 469/469 - 1s - 3ms/step - loss: 0.0626 - val_loss: 0.0622
+## Epoch 36/50
+## 469/469 - 1s - 3ms/step - loss: 0.0626 - val_loss: 0.0622
+## Epoch 37/50
+## 469/469 - 1s - 3ms/step - loss: 0.0626 - val_loss: 0.0622
+## Epoch 38/50
+## 469/469 - 1s - 3ms/step - loss: 0.0625 - val_loss: 0.0622
+## Epoch 39/50
+## 469/469 - 1s - 3ms/step - loss: 0.0625 - val_loss: 0.0621
+## Epoch 40/50
+## 469/469 - 1s - 3ms/step - loss: 0.0625 - val_loss: 0.0621
+## Epoch 41/50
+## 469/469 - 1s - 3ms/step - loss: 0.0624 - val_loss: 0.0621
+## Epoch 42/50
+## 469/469 - 1s - 3ms/step - loss: 0.0624 - val_loss: 0.0621
+## Epoch 43/50
+## 469/469 - 1s - 3ms/step - loss: 0.0624 - val_loss: 0.0620
+## Epoch 44/50
+## 469/469 - 1s - 3ms/step - loss: 0.0624 - val_loss: 0.0620
+## Epoch 45/50
+## 469/469 - 1s - 3ms/step - loss: 0.0623 - val_loss: 0.0620
+## Epoch 46/50
+## 469/469 - 1s - 3ms/step - loss: 0.0623 - val_loss: 0.0620
+## Epoch 47/50
+## 469/469 - 1s - 3ms/step - loss: 0.0623 - val_loss: 0.0619
+## Epoch 48/50
+## 469/469 - 1s - 3ms/step - loss: 0.0623 - val_loss: 0.0619
+## Epoch 49/50
+## 469/469 - 1s - 3ms/step - loss: 0.0622 - val_loss: 0.0619
+## Epoch 50/50
+## 469/469 - 1s - 3ms/step - loss: 0.0622 - val_loss: 0.0619
+

Let’s predict on our test dataset and display the original image +together with the prediction from our autoencoder.

+

Notice how the predictions are pretty close to the original images, +although not quite the same.

+
+predictions <- autoencoder |> predict(test_data)
+
## 313/313 - 1s - 2ms/step
+
+display(test_data, predictions)
+
+plot of chunk unnamed-chunk-5
plot of chunk unnamed-chunk-5
+
+

Now that we know that our autoencoder works, let’s retrain it using +the noisy data as our input and the clean data as our target. We want +our autoencoder to learn how to denoise the images.

+
+autoencoder |> fit(
+  x = noisy_train_data,
+  y = train_data,
+  epochs = 100,
+  batch_size = 128,
+  shuffle = TRUE,
+  validation_data = list(noisy_test_data, test_data),
+)
+
## Epoch 1/100
+## 469/469 - 1s - 3ms/step - loss: 0.0998 - val_loss: 0.0928
+## Epoch 2/100
+## 469/469 - 1s - 3ms/step - loss: 0.0925 - val_loss: 0.0908
+## Epoch 3/100
+## 469/469 - 1s - 3ms/step - loss: 0.0910 - val_loss: 0.0898
+## Epoch 4/100
+## 469/469 - 1s - 3ms/step - loss: 0.0900 - val_loss: 0.0890
+## Epoch 5/100
+## 469/469 - 1s - 3ms/step - loss: 0.0893 - val_loss: 0.0883
+## Epoch 6/100
+## 469/469 - 1s - 3ms/step - loss: 0.0888 - val_loss: 0.0877
+## Epoch 7/100
+## 469/469 - 1s - 3ms/step - loss: 0.0883 - val_loss: 0.0873
+## Epoch 8/100
+## 469/469 - 1s - 3ms/step - loss: 0.0879 - val_loss: 0.0870
+## Epoch 9/100
+## 469/469 - 1s - 3ms/step - loss: 0.0876 - val_loss: 0.0868
+## Epoch 10/100
+## 469/469 - 1s - 3ms/step - loss: 0.0873 - val_loss: 0.0866
+## Epoch 11/100
+## 469/469 - 1s - 3ms/step - loss: 0.0871 - val_loss: 0.0863
+## Epoch 12/100
+## 469/469 - 1s - 3ms/step - loss: 0.0869 - val_loss: 0.0861
+## Epoch 13/100
+## 469/469 - 1s - 3ms/step - loss: 0.0867 - val_loss: 0.0860
+## Epoch 14/100
+## 469/469 - 1s - 3ms/step - loss: 0.0865 - val_loss: 0.0858
+## Epoch 15/100
+## 469/469 - 1s - 3ms/step - loss: 0.0864 - val_loss: 0.0857
+## Epoch 16/100
+## 469/469 - 1s - 3ms/step - loss: 0.0862 - val_loss: 0.0855
+## Epoch 17/100
+## 469/469 - 1s - 3ms/step - loss: 0.0861 - val_loss: 0.0854
+## Epoch 18/100
+## 469/469 - 1s - 3ms/step - loss: 0.0860 - val_loss: 0.0853
+## Epoch 19/100
+## 469/469 - 1s - 3ms/step - loss: 0.0859 - val_loss: 0.0852
+## Epoch 20/100
+## 469/469 - 1s - 3ms/step - loss: 0.0858 - val_loss: 0.0852
+## Epoch 21/100
+## 469/469 - 1s - 3ms/step - loss: 0.0857 - val_loss: 0.0851
+## Epoch 22/100
+## 469/469 - 1s - 3ms/step - loss: 0.0856 - val_loss: 0.0850
+## Epoch 23/100
+## 469/469 - 1s - 3ms/step - loss: 0.0855 - val_loss: 0.0850
+## Epoch 24/100
+## 469/469 - 1s - 3ms/step - loss: 0.0855 - val_loss: 0.0849
+## Epoch 25/100
+## 469/469 - 1s - 3ms/step - loss: 0.0854 - val_loss: 0.0849
+## Epoch 26/100
+## 469/469 - 1s - 3ms/step - loss: 0.0854 - val_loss: 0.0848
+## Epoch 27/100
+## 469/469 - 1s - 3ms/step - loss: 0.0853 - val_loss: 0.0848
+## Epoch 28/100
+## 469/469 - 1s - 3ms/step - loss: 0.0853 - val_loss: 0.0847
+## Epoch 29/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0847
+## Epoch 30/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0847
+## Epoch 31/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0847
+## Epoch 32/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0846
+## Epoch 33/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0846
+## Epoch 34/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0846
+## Epoch 35/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0845
+## Epoch 36/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0845
+## Epoch 37/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0845
+## Epoch 38/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0845
+## Epoch 39/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0845
+## Epoch 40/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0844
+## Epoch 41/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0844
+## Epoch 42/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0844
+## Epoch 43/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0844
+## Epoch 44/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0844
+## Epoch 45/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0843
+## Epoch 46/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0843
+## Epoch 47/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0843
+## Epoch 48/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0843
+## Epoch 49/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0843
+## Epoch 50/100
+## 469/469 - 1s - 3ms/step - loss: 0.0846 - val_loss: 0.0843
+## Epoch 51/100
+## 469/469 - 1s - 3ms/step - loss: 0.0846 - val_loss: 0.0842
+## Epoch 52/100
+## 469/469 - 1s - 3ms/step - loss: 0.0846 - val_loss: 0.0842
+## Epoch 53/100
+## 469/469 - 1s - 3ms/step - loss: 0.0846 - val_loss: 0.0842
+## Epoch 54/100
+## 469/469 - 1s - 3ms/step - loss: 0.0846 - val_loss: 0.0842
+## Epoch 55/100
+## 469/469 - 1s - 3ms/step - loss: 0.0846 - val_loss: 0.0842
+## Epoch 56/100
+## 469/469 - 1s - 3ms/step - loss: 0.0845 - val_loss: 0.0842
+## Epoch 57/100
+## 469/469 - 1s - 3ms/step - loss: 0.0845 - val_loss: 0.0842
+## Epoch 58/100
+## 469/469 - 1s - 3ms/step - loss: 0.0845 - val_loss: 0.0842
+## Epoch 59/100
+## 469/469 - 1s - 3ms/step - loss: 0.0845 - val_loss: 0.0842
+## Epoch 60/100
+## 469/469 - 1s - 3ms/step - loss: 0.0845 - val_loss: 0.0841
+## Epoch 61/100
+## 469/469 - 1s - 3ms/step - loss: 0.0845 - val_loss: 0.0841
+## Epoch 62/100
+## 469/469 - 1s - 3ms/step - loss: 0.0845 - val_loss: 0.0841
+## Epoch 63/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0841
+## Epoch 64/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0841
+## Epoch 65/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0841
+## Epoch 66/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0841
+## Epoch 67/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0840
+## Epoch 68/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0840
+## Epoch 69/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0840
+## Epoch 70/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0840
+## Epoch 71/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0840
+## Epoch 72/100
+## 469/469 - 1s - 3ms/step - loss: 0.0844 - val_loss: 0.0840
+## Epoch 73/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 74/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 75/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 76/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 77/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 78/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 79/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 80/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 81/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 82/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 83/100
+## 469/469 - 1s - 3ms/step - loss: 0.0843 - val_loss: 0.0840
+## Epoch 84/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 85/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 86/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 87/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 88/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 89/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 90/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 91/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 92/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 93/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 94/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 95/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 96/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 97/100
+## 469/469 - 1s - 3ms/step - loss: 0.0842 - val_loss: 0.0839
+## Epoch 98/100
+## 469/469 - 1s - 3ms/step - loss: 0.0841 - val_loss: 0.0839
+## Epoch 99/100
+## 469/469 - 1s - 3ms/step - loss: 0.0841 - val_loss: 0.0839
+## Epoch 100/100
+## 469/469 - 1s - 3ms/step - loss: 0.0841 - val_loss: 0.0839
+

Let’s now predict on the noisy data and display the results of our +autoencoder.

+

Notice how the autoencoder does an amazing job at removing the noise +from the input images.

+
+predictions <- autoencoder |> predict(noisy_test_data)
+
## 313/313 - 0s - 581us/step
+
+display(noisy_test_data, predictions)
+
+plot of chunk unnamed-chunk-7
plot of chunk unnamed-chunk-7
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/autoencoder/unnamed-chunk-2-1.png b/docs/articles/examples/autoencoder/unnamed-chunk-2-1.png new file mode 100644 index 0000000000..e23f5e78c7 Binary files /dev/null and b/docs/articles/examples/autoencoder/unnamed-chunk-2-1.png differ diff --git a/docs/articles/examples/autoencoder/unnamed-chunk-5-1.png b/docs/articles/examples/autoencoder/unnamed-chunk-5-1.png new file mode 100644 index 0000000000..f8980bd34e Binary files /dev/null and b/docs/articles/examples/autoencoder/unnamed-chunk-5-1.png differ diff --git a/docs/articles/examples/autoencoder/unnamed-chunk-7-1.png b/docs/articles/examples/autoencoder/unnamed-chunk-7-1.png new file mode 100644 index 0000000000..1b3c7e6255 Binary files /dev/null and b/docs/articles/examples/autoencoder/unnamed-chunk-7-1.png differ diff --git a/docs/articles/examples/index.html b/docs/articles/examples/index.html index 9f7edda166..c4eb26ba3f 100644 --- a/docs/articles/examples/index.html +++ b/docs/articles/examples/index.html @@ -1,10 +1,231 @@ - - + + + - - + + + +Keras examples • keras3 + + + + + + + + + + + + + + - + Skip to contents + + +
+ + + + +
+
+ + + +
+ +

Structured Data

+
+
+
+
Imbalanced classification: credit card fraud detection
+
+basic +

Demonstration of how to handle highly imbalanced classification problems.

+See code +
+
+
+
+
+
Structured data classification with FeatureSpace
+
+basic +

Classify tabular data in a few lines of code.

+See code +
+
+
+
+ +

Text

+
+
+
+
Text classification from scratch
+
+basic +

Text sentiment classification starting from raw text files.

+See code +
+
+
+
+ +

Timeseries

+
+
+
+
Timeseries anomaly detection using an Autoencoder
+
+intermediate +

Detect anomalies in a timeseries using an Autoencoder.

+See code +
+
+
+
+
+
Timeseries classification from scratch
+
+basic +

Training a timeseries classifier from scratch on the FordA dataset from the UCR/UEA archive.

+See code +
+
+
+
+ +

Vision

+
+
+
+
Convolutional autoencoder for image denoising
+
+basic +

How to train a deep convolutional autoencoder for image denoising.

+See code +
+
+
+
+
+
Simple MNIST convnet
+
+basic +

A simple convnet that achieves ~99% test accuracy on MNIST.

+See code +
+
+
+
+
+
Image segmentation with a U-Net-like architecture
+
+intermediate +

Image segmentation model trained from scratch on the Oxford Pets dataset.

+See code +
+
+
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + diff --git a/docs/articles/examples/mnist_convnet.html b/docs/articles/examples/mnist_convnet.html new file mode 100644 index 0000000000..4fab480874 --- /dev/null +++ b/docs/articles/examples/mnist_convnet.html @@ -0,0 +1,267 @@ + + + + + + + + +Simple MNIST convnet • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+ +
+
+

Prepare the data +

+
+# Model / data parameters
+num_classes <- 10
+input_shape <- c(28, 28, 1)
+
+# Load the data and split it between train and test sets
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+
+# Scale images to the [0, 1] range
+x_train <- x_train / 255
+x_test <- x_test / 255
+# Make sure images have shape (28, 28, 1)
+x_train <- op_expand_dims(x_train, -1)
+x_test <- op_expand_dims(x_test, -1)
+
+
+dim(x_train)
+
## [1] 60000    28    28     1
+
+dim(x_test)
+
## [1] 10000    28    28     1
+
+# convert class vectors to binary class matrices
+y_train <- to_categorical(y_train, num_classes)
+y_test <- to_categorical(y_test, num_classes)
+
+
+

Build the model +

+
+model <- keras_model_sequential(input_shape = input_shape)
+model |>
+  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu") |>
+  layer_max_pooling_2d(pool_size = c(2, 2)) |>
+  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") |>
+  layer_max_pooling_2d(pool_size = c(2, 2)) |>
+  layer_flatten() |>
+  layer_dropout(rate = 0.5) |>
+  layer_dense(units = num_classes, activation = "softmax")
+
+summary(model)
+
## Model: "sequential"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv2d_1 (Conv2D)               │ (None, 26, 26, 32)     │           320
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d_1 (MaxPooling2D)  │ (None, 13, 13, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d (Conv2D)                 │ (None, 11, 11, 64)     │        18,496
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 5, 5, 64)       │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ flatten (Flatten)               │ (None, 1600)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, 1600)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense (Dense)                   │ (None, 10)             │        16,010
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 34,826 (136.04 KB)
+##  Trainable params: 34,826 (136.04 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+
+

Train the model +

+
+batch_size <- 128
+epochs <- 15
+
+model |> compile(
+  loss = "categorical_crossentropy",
+  optimizer = "adam",
+  metrics = "accuracy"
+)
+
+model |> fit(
+  x_train, y_train,
+  batch_size = batch_size,
+  epochs = epochs,
+  validation_split = 0.1
+)
+
## Epoch 1/15
+## 422/422 - 4s - 11ms/step - accuracy: 0.8845 - loss: 0.3815 - val_accuracy: 0.9783 - val_loss: 0.0810
+## Epoch 2/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9644 - loss: 0.1151 - val_accuracy: 0.9863 - val_loss: 0.0548
+## Epoch 3/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9735 - loss: 0.0838 - val_accuracy: 0.9880 - val_loss: 0.0455
+## Epoch 4/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9781 - loss: 0.0689 - val_accuracy: 0.9893 - val_loss: 0.0413
+## Epoch 5/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9811 - loss: 0.0608 - val_accuracy: 0.9907 - val_loss: 0.0373
+## Epoch 6/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9824 - loss: 0.0561 - val_accuracy: 0.9908 - val_loss: 0.0359
+## Epoch 7/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9848 - loss: 0.0487 - val_accuracy: 0.9917 - val_loss: 0.0328
+## Epoch 8/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9845 - loss: 0.0484 - val_accuracy: 0.9915 - val_loss: 0.0334
+## Epoch 9/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9859 - loss: 0.0439 - val_accuracy: 0.9923 - val_loss: 0.0320
+## Epoch 10/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9868 - loss: 0.0405 - val_accuracy: 0.9925 - val_loss: 0.0319
+## Epoch 11/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9874 - loss: 0.0387 - val_accuracy: 0.9920 - val_loss: 0.0317
+## Epoch 12/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9882 - loss: 0.0367 - val_accuracy: 0.9925 - val_loss: 0.0301
+## Epoch 13/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9879 - loss: 0.0361 - val_accuracy: 0.9928 - val_loss: 0.0281
+## Epoch 14/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9889 - loss: 0.0332 - val_accuracy: 0.9930 - val_loss: 0.0284
+## Epoch 15/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9897 - loss: 0.0311 - val_accuracy: 0.9925 - val_loss: 0.0293
+
+
+

Evaluate the trained model +

+
+score <- model |> evaluate(x_test, y_test, verbose = 0)
+score
+
## $accuracy
+## [1] 0.9911
+##
+## $loss
+## [1] 0.02564374
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/nlp/text_classification_from_scratch.html b/docs/articles/examples/nlp/text_classification_from_scratch.html new file mode 100644 index 0000000000..220dc2927c --- /dev/null +++ b/docs/articles/examples/nlp/text_classification_from_scratch.html @@ -0,0 +1,474 @@ + + + + + + + + +Text classification from scratch • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This example shows how to do text classification starting from raw +text (as a set of text files on disk). We demonstrate the workflow on +the IMDB sentiment classification dataset (unprocessed version). We use +[layer_text_vectorization()] for word splitting & +indexing.

+
+
+

Setup +

+
+options(conflicts.policy = "strict")
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+library(keras3)
+use_virtualenv("r-keras")
+
+
+

Load the data: IMDB movie review sentiment classification +

+

Let’s download the data and inspect its structure.

+
+if (!dir.exists("datasets/aclImdb")) {
+  dir.create("datasets")
+  download.file(
+    "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
+    "datasets/aclImdb_v1.tar.gz"
+  )
+  untar("datasets/aclImdb_v1.tar.gz", exdir = "datasets")
+  unlink("datasets/aclImdb/train/unsup", recursive = TRUE)
+}
+

The aclImdb folder contains a train and +test subfolder:

+
+head(list.files("datasets/aclImdb/test"))
+
## [1] "labeledBow.feat" "neg"             "pos"             "urls_neg.txt"
+## [5] "urls_pos.txt"
+
+head(list.files("datasets/aclImdb/train"))
+
## [1] "labeledBow.feat" "neg"             "pos"             "unsupBow.feat"
+## [5] "urls_neg.txt"    "urls_pos.txt"
+

The aclImdb/train/pos and aclImdb/train/neg +folders contain text files, each of which represents one review (either +positive or negative):

+
+cat(readLines("datasets/aclImdb/train/pos/6248_7.txt"))
+
## Being an Austrian myself this has been a straight knock in my face. Fortunately I don't live nowhere near the place where this movie takes place but unfortunately it portrays everything that the rest of Austria hates about Viennese people (or people close to that region). And it is very easy to read that this is exactly the directors intention: to let your head sink into your hands and say "Oh my god, how can THAT be possible!". No, not with me, the (in my opinion) totally exaggerated uncensored swinger club scene is not necessary, I watch porn, sure, but in this context I was rather disgusted than put in the right context.<br /><br />This movie tells a story about how misled people who suffer from lack of education or bad company try to survive and live in a world of redundancy and boring horizons. A girl who is treated like a whore by her super-jealous boyfriend (and still keeps coming back), a female teacher who discovers her masochism by putting the life of her super-cruel "lover" on the line, an old couple who has an almost mathematical daily cycle (she is the "official replacement" of his ex wife), a couple that has just divorced and has the ex husband suffer under the acts of his former wife obviously having a relationship with her masseuse and finally a crazy hitchhiker who asks her drivers the most unusual questions and stretches their nerves by just being super-annoying.<br /><br />After having seen it you feel almost nothing. You're not even shocked, sad, depressed or feel like doing anything... Maybe that's why I gave it 7 points, it made me react in a way I never reacted before. If that's good or bad is up to you!
+

We are only interested in the pos and neg +subfolders, so let’s delete the other subfolder that has text files in +it:

+
+unlink("datasets/aclImdb/train/unsup", recursive = TRUE)
+

You can use the utility text_dataset_from_directory() to +generate a labeled tf_dataset object from a set of text +files on disk filed into class-specific folders.

+

Let’s use it to generate the training, validation, and test datasets. +The validation and training datasets are generated from two subsets of +the train directory, with 20% of samples going to the +validation dataset and 80% going to the training dataset.

+

Having a validation dataset in addition to the test dataset is useful +for tuning hyperparameters, such as the model architecture, for which +the test dataset should not be used.

+

Before putting the model out into the real world however, it should +be retrained using all available training data (without creating a +validation dataset), so its performance is maximized.

+

When using the validation_split and subset +arguments, make sure to either specify a random seed, or to pass +shuffle=FALSE, so that the validation & training splits +you get have no overlap.

+
+batch_size <- 32
+
+raw_train_ds <- text_dataset_from_directory(
+  "datasets/aclImdb/train",
+  batch_size = batch_size,
+  validation_split = 0.2,
+  subset = "training",
+  seed = 1337
+)
+
## Found 25000 files belonging to 2 classes.
+## Using 20000 files for training.
+
+raw_val_ds <- text_dataset_from_directory(
+  "datasets/aclImdb/train",
+  batch_size = batch_size,
+  validation_split = 0.2,
+  subset = "validation",
+  seed = 1337
+)
+
## Found 25000 files belonging to 2 classes.
+## Using 5000 files for validation.
+
+raw_test_ds <- text_dataset_from_directory(
+  "datasets/aclImdb/test",
+  batch_size = batch_size
+)
+
## Found 25000 files belonging to 2 classes.
+
+cat("Number of batches in raw_train_ds:", length(raw_train_ds), "\n")
+
## Number of batches in raw_train_ds: 625
+
+cat("Number of batches in raw_val_ds:", length(raw_val_ds), "\n")
+
## Number of batches in raw_val_ds: 157
+
+cat("Number of batches in raw_test_ds:", length(raw_test_ds), "\n")
+
## Number of batches in raw_test_ds: 782
+

Let’s preview a few samples:

+
+# It's important to take a look at your raw data to ensure your normalization
+# and tokenization will work as expected. We can do that by taking a few
+# examples from the training set and looking at them.
+# This is one of the places where eager execution shines:
+# we can just evaluate these tensors using .numpy()
+# instead of needing to evaluate them in a Session/Graph context.
+batch <- iter_next(as_iterator(raw_train_ds))
+str(batch)
+
## List of 2
+##  $ :<tf.Tensor: shape=(32), dtype=string, numpy=…>
+##  $ :<tf.Tensor: shape=(32), dtype=int32, numpy=…>
+
+c(text_batch, label_batch) %<-% batch
+for (i in 1:3) {
+  print(text_batch[i])
+  print(label_batch[i])
+}
+
## tf.Tensor(b"I have read the novel Reaper of Ben Mezrich a fews years ago and last night I accidentally came to see this adaption.<br /><br />Although it's been years since I read the story the first time, the differences between the novel and the movie are humongous. Very important elements, which made the whole thing plausible are just written out or changed to bad.<br /><br />If the plot sounds interesting to you: go and get the novel. Its much, much, much better.<br /><br />Still 4 out of 10 since it was hard to stop watching because of the great basic plot by Ben Mezrich.", shape=(), dtype=string)
+## tf.Tensor(0, shape=(), dtype=int32)
+## tf.Tensor(b'After seeing all the Jesse James, Quantrill, jayhawkers,etc films in the fifties, it is quite a thrill to see this film with a new perspective by director Ang Lee. The scene of the attack of Lawrence, Kansas is awesome. The romantic relationship between Jewel and Toby Mcguire turns out to be one of the best parts and Jonathan Rhys-Meyers is outstanding as the bad guy. All the time this film makes you feel the horror of war, and the desperate situation of the main characters who do not know if they are going to survive the next hours. Definitely worth seeing.', shape=(), dtype=string)
+## tf.Tensor(1, shape=(), dtype=int32)
+## tf.Tensor(b'AG was an excellent presentation of drama, suspense and thriller that is so rare to American TV. Sheriff Lucas gave many a viewer the willies. We rooted for Caleb as he strove to resist the overtures of Sheriff Lucas. We became engrossed and fearful upon learning of the unthinkable connection between these two characters. The manipulations which weekly gave cause to fear what Lucas would do next were truly surprising. This show lived up to the "Gothic" moniker in ways American entertainment has so seldom attempted, much less mastered. The suits definitely made a big mistake in not supporting this show. This show puts shame to the current glut of "reality" shows- which are so less than satisfying viewing.The call for a DVD box set is well based. This show is quality viewing for a discerning market hungry for quality viewing. A public that is tiring of over-saturation of mind-numbing reality fare will welcome this gem of real storytelling. Bring on the DVD box set!!', shape=(), dtype=string)
+## tf.Tensor(1, shape=(), dtype=int32)
+
+
+

Prepare the data +

+

In particular, we remove <br /> tags.

+
+# Having looked at our data above, we see that the raw text contains HTML break
+# tags of the form '<br />'. These tags will not be removed by the default
+# standardizer (which doesn't strip HTML). Because of this, we will need to
+# create a custom standardization function.
+custom_standardization_fn <- function(string_tensor) {
+  string_tensor |>
+    tf$strings$lower() |> # convert to all lowercase
+    tf$strings$regex_replace("<br />", " ") |> # remove '<br />' HTML tag
+    tf$strings$regex_replace("[[:punct:]]", "") # remove punctuation
+}
+
+
+# Model constants.
+max_features <- 20000
+embedding_dim <- 128
+sequence_length <- 500
+
+# Now that we have our custom standardization, we can instantiate our text
+# vectorization layer. We are using this layer to normalize, split, and map
+# strings to integers, so we set our 'output_mode' to 'int'.
+# Note that we're using the default split function,
+# and the custom standardization defined above.
+# We also set an explicit maximum sequence length, since the CNNs later in our
+# model won't support ragged sequences.
+vectorize_layer <- layer_text_vectorization(
+  standardize = custom_standardization_fn,
+  max_tokens = max_features,
+  output_mode = "int",
+  output_sequence_length = sequence_length,
+)
+
+# Now that the vectorize_layer has been created, call `adapt` on a text-only
+# dataset to create the vocabulary. You don't have to batch, but for very large
+# datasets this means you're not keeping spare copies of the dataset in memory.
+
+# Let's make a text-only dataset (no labels):
+text_ds <- raw_train_ds |>
+  dataset_map(\(x, y) x)
+# Let's call `adapt`:
+vectorize_layer |> adapt(text_ds)
+
+
+

Two options to vectorize the data +

+

There are 2 ways we can use our text vectorization layer:

+

Option 1: Make it part of the model, so as to obtain +a model that processes raw strings, like this:

+
+text_input <- keras_input(shape = c(1L), dtype = "string", name = 'text')
+x <- text_input |>
+  vectorize_layer() |>
+  layer_embedding(max_features + 1, embedding_dim)
+

Option 2: Apply it to the text dataset to obtain a +dataset of word indices, then feed it into a model that expects integer +sequences as inputs.

+

An important difference between the two is that option 2 enables you +to do asynchronous CPU processing and buffering of your +data when training on GPU. So if you’re training the model on GPU, you +probably want to go with this option to get the best performance. This +is what we will do below.

+

If we were to export our model to production, we’d ship a model that +accepts raw strings as input, like in the code snippet for option 1 +above. This can be done after training. We do this in the last +section.

+
+vectorize_text <- function(text, label) {
+  text <- text |>
+    op_expand_dims(-1) |>
+    vectorize_layer()
+  list(text, label)
+}
+
+# Vectorize the data.
+train_ds <- raw_train_ds |> dataset_map(vectorize_text)
+val_ds   <- raw_val_ds   |> dataset_map(vectorize_text)
+test_ds  <- raw_test_ds  |> dataset_map(vectorize_text)
+
+# Do async prefetching / buffering of the data for best performance on GPU.
+train_ds <- train_ds |>
+  dataset_cache() |>
+  dataset_prefetch(buffer_size = 10)
+val_ds <- val_ds |>
+  dataset_cache() |>
+  dataset_prefetch(buffer_size = 10)
+test_ds <- test_ds |>
+  dataset_cache() |>
+  dataset_prefetch(buffer_size = 10)
+
+
+

Build a model +

+

We choose a simple 1D convnet starting with an Embedding +layer.

+
+# A integer input for vocab indices.
+inputs <- keras_input(shape = c(NA), dtype = "int64")
+
+predictions <- inputs |>
+  # Next, we add a layer to map those vocab indices into a space of dimensionality
+  # 'embedding_dim'.
+  layer_embedding(max_features, embedding_dim) |>
+  layer_dropout(0.5) |>
+  # Conv1D + global max pooling
+  layer_conv_1d(128, 7, padding = "valid", activation = "relu", strides = 3) |>
+  layer_conv_1d(128, 7, padding = "valid", activation = "relu", strides = 3) |>
+  layer_global_max_pooling_1d() |>
+  # We add a vanilla hidden layer:
+  layer_dense(128, activation = "relu") |>
+  layer_dropout(0.5) |>
+  # We project onto a single unit output layer, and squash it with a sigmoid:
+  layer_dense(1, activation = "sigmoid", name = "predictions")
+
+model <- keras_model(inputs, predictions)
+
+summary(model)
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ input_layer (InputLayer)        │ (None, None)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ embedding_1 (Embedding)         │ (None, None, 128)      │     2,560,000
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, None, 128)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d (Conv1D)                 │ (None, None, 128)      │       114,816
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d_1 (Conv1D)               │ (None, None, 128)      │       114,816
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_max_pooling1d            │ (None, 128)            │             0
+## │ (GlobalMaxPooling1D)            │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense (Dense)                   │ (None, 128)            │        16,512
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout_1 (Dropout)             │ (None, 128)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ predictions (Dense)             │ (None, 1)              │           129
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 2,806,273 (10.71 MB)
+##  Trainable params: 2,806,273 (10.71 MB)
+##  Non-trainable params: 0 (0.00 B)
+
+# Compile the model with binary crossentropy loss and an adam optimizer.
+model |> compile(loss = "binary_crossentropy",
+                 optimizer = "adam",
+                 metrics = "accuracy")
+
+
+

Train the model +

+
+epochs <- 3
+
+# Fit the model using the train and test datasets.
+model |> fit(train_ds, validation_data = val_ds, epochs = epochs)
+
## Epoch 1/3
+## 625/625 - 5s - 8ms/step - accuracy: 0.6944 - loss: 0.5248 - val_accuracy: 0.8624 - val_loss: 0.3150
+## Epoch 2/3
+## 625/625 - 2s - 2ms/step - accuracy: 0.9046 - loss: 0.2403 - val_accuracy: 0.8730 - val_loss: 0.3135
+## Epoch 3/3
+## 625/625 - 2s - 2ms/step - accuracy: 0.9524 - loss: 0.1275 - val_accuracy: 0.8716 - val_loss: 0.3424
+
+
+

Evaluate the model on the test set +

+
+model |> evaluate(test_ds)
+
## 782/782 - 1s - 2ms/step - accuracy: 0.8608 - loss: 0.3672
+
## $accuracy
+## [1] 0.86084
+##
+## $loss
+## [1] 0.3671538
+
+
+

Make an end-to-end model +

+

If you want to obtain a model capable of processing raw strings, you +can simply create a new model (using the weights we just trained):

+
+# A string input
+inputs <- keras_input(shape = c(1), dtype = "string")
+# Turn strings into vocab indices
+indices <- vectorize_layer(inputs)
+# Turn vocab indices into predictions
+outputs <- model(indices)
+
+# Our end to end model
+end_to_end_model <- keras_model(inputs, outputs)
+end_to_end_model |> compile(
+  loss = "binary_crossentropy",
+  optimizer = "adam",
+  metrics = c("accuracy")
+)
+
+# Test it with `raw_test_ds`, which yields raw strings
+end_to_end_model |> evaluate(raw_test_ds)
+
## 782/782 - 3s - 4ms/step - accuracy: 0.8608 - loss: 0.0000e+00
+
## $accuracy
+## [1] 0.86084
+##
+## $loss
+## [1] 0
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/oxford_pets_image_segmentation.html b/docs/articles/examples/oxford_pets_image_segmentation.html new file mode 100644 index 0000000000..dbc6319d56 --- /dev/null +++ b/docs/articles/examples/oxford_pets_image_segmentation.html @@ -0,0 +1,743 @@ + + + + + + + + +Image segmentation with a U-Net-like architecture • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Download the data +

+
+options(timeout = 5000)
+download.file(
+  "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz",
+  "datasets/images.tar.gz"
+)
+download.file(
+  "https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz",
+  "datasets/annotations.tar.gz"
+)
+
+untar("datasets/images.tar.gz", exdir = "datasets")
+untar("datasets/annotations.tar.gz", exdir = "datasets")
+
+
+

Prepare paths of input images and target segmentation masks +

+
+library(keras3)
+input_dir <- "datasets/images/"
+target_dir <- "datasets/annotations/trimaps/"
+img_size <- c(160, 160)
+num_classes <- 3
+batch_size <- 32
+
+input_img_paths <- fs::dir_ls(input_dir, glob = "*.jpg") |> sort()
+target_img_paths <- fs::dir_ls(target_dir, glob = "*.png") |> sort()
+
+cat("Number of samples:", length(input_img_paths), "\n")
+
## Number of samples: 7390
+
+for (i in 1:10) {
+  cat(input_img_paths[i], "|", target_img_paths[i], "\n")
+}
+
## datasets/images/Abyssinian_1.jpg | datasets/annotations/trimaps/Abyssinian_1.png
+## datasets/images/Abyssinian_10.jpg | datasets/annotations/trimaps/Abyssinian_10.png
+## datasets/images/Abyssinian_100.jpg | datasets/annotations/trimaps/Abyssinian_100.png
+## datasets/images/Abyssinian_101.jpg | datasets/annotations/trimaps/Abyssinian_101.png
+## datasets/images/Abyssinian_102.jpg | datasets/annotations/trimaps/Abyssinian_102.png
+## datasets/images/Abyssinian_103.jpg | datasets/annotations/trimaps/Abyssinian_103.png
+## datasets/images/Abyssinian_104.jpg | datasets/annotations/trimaps/Abyssinian_104.png
+## datasets/images/Abyssinian_105.jpg | datasets/annotations/trimaps/Abyssinian_105.png
+## datasets/images/Abyssinian_106.jpg | datasets/annotations/trimaps/Abyssinian_106.png
+## datasets/images/Abyssinian_107.jpg | datasets/annotations/trimaps/Abyssinian_107.png
+
+
+

What does one input image and corresponding segmentation mask look +like? +

+
+# Display input image #10
+input_img_paths[10] |>
+  jpeg::readJPEG() |>
+  as.raster() |>
+  plot()
+
+plot of chunk unnamed-chunk-4
plot of chunk unnamed-chunk-4
+
+
+target_img_paths[10] |>
+  png::readPNG() |>
+  magrittr::multiply_by(255)|>
+  as.raster(max = 3) |>
+  plot()
+
+plot of chunk unnamed-chunk-4
plot of chunk unnamed-chunk-4
+
+
+
+

Prepare dataset to load & vectorize batches of data +

+
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+
+
+# Returns a tf_dataset
+get_dataset <- function(batch_size, img_size, input_img_paths, target_img_paths,
+                        max_dataset_len = NULL) {
+
+  img_size <- as.integer(img_size)
+
+  load_img_masks <- function(input_img_path, target_img_path) {
+    input_img <- input_img_path |>
+      tf$io$read_file() |>
+      tf$io$decode_jpeg(channels = 3) |>
+      tf$image$resize(img_size) |>
+      tf$image$convert_image_dtype("float32")
+
+    target_img <- target_img_path |>
+      tf$io$read_file() |>
+      tf$io$decode_png(channels = 1) |>
+      tf$image$resize(img_size, method = "nearest") |>
+      tf$image$convert_image_dtype("uint8")
+
+    # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
+    target_img <- target_img - 1L
+
+    list(input_img, target_img)
+  }
+
+  if (!is.null(max_dataset_len)) {
+    input_img_paths <- input_img_paths[1:max_dataset_len]
+    target_img_paths <- target_img_paths[1:max_dataset_len]
+  }
+
+  list(input_img_paths, target_img_paths) |>
+    tensor_slices_dataset() |>
+    dataset_map(load_img_masks, num_parallel_calls = tf$data$AUTOTUNE)|>
+    dataset_batch(batch_size)
+}
+
+
+

Prepare U-Net Xception-style model +

+
+get_model <- function(img_size, num_classes) {
+
+  inputs <- keras_input(shape = c(img_size, 3))
+
+  ### [First half of the network: downsampling inputs] ###
+
+  # Entry block
+  x <- inputs |>
+    layer_conv_2d(filters = 32, kernel_size = 3, strides = 2, padding = "same") |>
+    layer_batch_normalization() |>
+    layer_activation("relu")
+
+  previous_block_activation <- x  # Set aside residual
+
+  for (filters in c(64, 128, 256)) {
+    x <- x |>
+      layer_activation("relu") |>
+      layer_separable_conv_2d(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_activation("relu") |>
+      layer_separable_conv_2d(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_max_pooling_2d(pool_size = 3, strides = 2, padding = "same")
+
+    residual <- previous_block_activation |>
+      layer_conv_2d(filters = filters, kernel_size = 1, strides = 2, padding = "same")
+
+    x <- layer_add(x, residual)  # Add back residual
+    previous_block_activation <- x  # Set aside next residual
+  }
+
+  ### [Second half of the network: upsampling inputs] ###
+
+  for (filters in c(256, 128, 64, 32)) {
+    x <- x |>
+      layer_activation("relu") |>
+      layer_conv_2d_transpose(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_activation("relu") |>
+      layer_conv_2d_transpose(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_upsampling_2d(size = 2)
+
+    # Project residual
+    residual <- previous_block_activation |>
+      layer_upsampling_2d(size = 2) |>
+      layer_conv_2d(filters = filters, kernel_size = 1, padding = "same")
+
+    x <- layer_add(x, residual)     # Add back residual
+    previous_block_activation <- x  # Set aside next residual
+  }
+
+  # Add a per-pixel classification layer
+  outputs <- x |>
+    layer_conv_2d(num_classes, 3, activation = "softmax", padding = "same")
+
+  # Define the model
+  keras_model(inputs, outputs)
+}
+
+# Build model
+model <- get_model(img_size, num_classes)
+summary(model)
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━┓
+## ┃ Layer (type)       Output Shape       Param #  Connected to    Trai… 
+## ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━┩
+## │ input_layer       │ (None, 160,     │         0 │ -              │   -
+## │ (InputLayer)      │ 160, 3)         │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d (Conv2D)   │ (None, 80, 80,  │       896 │ input_layer[0… │   Y
+## │                   │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       128 │ conv2d[0][0]   │   Y
+## │ (BatchNormalizat…32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation        │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_2      │ (None, 80, 80,  │         0 │ activation[0]… │   -
+## │ (Activation)      │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 80, 80,  │     2,400 │ activation_2[Y
+## │ (SeparableConv2D) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       256 │ separable_con… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_1      │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d  │ (None, 80, 80,  │     4,736 │ activation_1[Y
+## │ (SeparableConv2D) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       256 │ separable_con… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ max_pooling2d     │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (MaxPooling2D)    │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_1 (Conv2D) │ (None, 40, 40,  │     2,112 │ activation[0]… │   Y
+## │                   │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add (Add)         │ (None, 40, 40,  │         0 │ max_pooling2d… │   -
+## │                   │ 64)             │           │ conv2d_1[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_4      │ (None, 40, 40,  │         0 │ add[0][0]      │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 40, 40,  │     8,896 │ activation_4[Y
+## │ (SeparableConv2D) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       512 │ separable_con… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_3      │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 40, 40,  │    17,664 │ activation_3[Y
+## │ (SeparableConv2D) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       512 │ separable_con… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ max_pooling2d_1   │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (MaxPooling2D)    │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_2 (Conv2D) │ (None, 20, 20,  │     8,320 │ add[0][0]      │   Y
+## │                   │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_1 (Add)       │ (None, 20, 20,  │         0 │ max_pooling2d… │   -
+## │                   │ 128)            │           │ conv2d_2[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_6      │ (None, 20, 20,  │         0 │ add_1[0][0]    │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 20, 20,  │    34,176 │ activation_6[Y
+## │ (SeparableConv2D) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │     1,024 │ separable_con… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_5      │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 20, 20,  │    68,096 │ activation_5[Y
+## │ (SeparableConv2D) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │     1,024 │ separable_con… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ max_pooling2d_2   │ (None, 10, 10,  │         0 │ batch_normali… │   -
+## │ (MaxPooling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_3 (Conv2D) │ (None, 10, 10,  │    33,024 │ add_1[0][0]    │   Y
+## │                   │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_2 (Add)       │ (None, 10, 10,  │         0 │ max_pooling2d… │   -
+## │                   │ 256)            │           │ conv2d_3[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_8      │ (None, 10, 10,  │         0 │ add_2[0][0]    │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 10, 10,  │   590,080 │ activation_8[Y
+## │ (Conv2DTranspose) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 10, 10,  │     1,024 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_7      │ (None, 10, 10,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose  │ (None, 10, 10,  │   590,080 │ activation_7[Y
+## │ (Conv2DTranspose) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 10, 10,  │     1,024 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_1   │ (None, 20, 20,  │         0 │ add_2[0][0]    │   -
+## │ (UpSampling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d     │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_4 (Conv2D) │ (None, 20, 20,  │    65,792 │ up_sampling2d… │   Y
+## │                   │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_3 (Add)       │ (None, 20, 20,  │         0 │ up_sampling2d… │   -
+## │                   │ 256)            │           │ conv2d_4[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_10     │ (None, 20, 20,  │         0 │ add_3[0][0]    │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 20, 20,  │   295,040 │ activation_10… │   Y
+## │ (Conv2DTranspose) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │       512 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_9      │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 20, 20,  │   147,584 │ activation_9[Y
+## │ (Conv2DTranspose) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │       512 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_3   │ (None, 40, 40,  │         0 │ add_3[0][0]    │   -
+## │ (UpSampling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_2   │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_5 (Conv2D) │ (None, 40, 40,  │    32,896 │ up_sampling2d… │   Y
+## │                   │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_4 (Add)       │ (None, 40, 40,  │         0 │ up_sampling2d… │   -
+## │                   │ 128)            │           │ conv2d_5[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_12     │ (None, 40, 40,  │         0 │ add_4[0][0]    │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 40, 40,  │    73,792 │ activation_12… │   Y
+## │ (Conv2DTranspose) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       256 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_11     │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 40, 40,  │    36,928 │ activation_11… │   Y
+## │ (Conv2DTranspose) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       256 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_5   │ (None, 80, 80,  │         0 │ add_4[0][0]    │   -
+## │ (UpSampling2D)    │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_4   │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_6 (Conv2D) │ (None, 80, 80,  │     8,256 │ up_sampling2d… │   Y
+## │                   │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_5 (Add)       │ (None, 80, 80,  │         0 │ up_sampling2d… │   -
+## │                   │ 64)             │           │ conv2d_6[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_14     │ (None, 80, 80,  │         0 │ add_5[0][0]    │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 80, 80,  │    18,464 │ activation_14… │   Y
+## │ (Conv2DTranspose) │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       128 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_13     │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 80, 80,  │     9,248 │ activation_13… │   Y
+## │ (Conv2DTranspose) │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       128 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_7   │ (None, 160,     │         0 │ add_5[0][0]    │   -
+## │ (UpSampling2D)    │ 160, 64)        │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_6   │ (None, 160,     │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 160, 32)        │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_7 (Conv2D) │ (None, 160,     │     2,080 │ up_sampling2d… │   Y
+## │                   │ 160, 32)        │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_6 (Add)       │ (None, 160,     │         0 │ up_sampling2d… │   -
+## │                   │ 160, 32)        │           │ conv2d_7[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_8 (Conv2D) │ (None, 160,     │       867 │ add_6[0][0]    │   Y
+## │                   │ 160, 3)         │           │                │       │
+## └───────────────────┴─────────────────┴───────────┴────────────────┴───────┘
+##  Total params: 2,058,979 (7.85 MB)
+##  Trainable params: 2,055,203 (7.84 MB)
+##  Non-trainable params: 3,776 (14.75 KB)
+
+
+

Set aside a validation split +

+
+# Split our img paths into a training and a validation set
+val_samples <- 1000
+val_samples <- sample.int(length(input_img_paths), val_samples)
+
+train_input_img_paths <- input_img_paths[-val_samples]
+train_target_img_paths <- target_img_paths[-val_samples]
+
+val_input_img_paths <- input_img_paths[val_samples]
+val_target_img_paths <- target_img_paths[val_samples]
+
+# Instantiate dataset for each split
+# Limit input files in `max_dataset_len` for faster epoch training time.
+# Remove the `max_dataset_len` arg when running with full dataset.
+train_dataset <- get_dataset(
+  batch_size,
+  img_size,
+  train_input_img_paths,
+  train_target_img_paths,
+  max_dataset_len = 1000
+)
+valid_dataset <- get_dataset(
+  batch_size, img_size, val_input_img_paths, val_target_img_paths
+)
+
+
+

Train the model +

+
+# Configure the model for training.
+# We use the "sparse" version of categorical_crossentropy
+# because our target data is integers.
+model |> compile(
+  optimizer = optimizer_adam(1e-4),
+  loss = "sparse_categorical_crossentropy"
+)
+
+callbacks <- list(
+  callback_model_checkpoint(
+    "models/oxford_segmentation.keras", save_best_only = TRUE
+  )
+)
+
+# Train the model, doing validation at the end of each epoch.
+epochs <- 50
+model |> fit(
+    train_dataset,
+    epochs=epochs,
+    validation_data=valid_dataset,
+    callbacks=callbacks,
+    verbose=2
+)
+
## Epoch 1/50
+## 32/32 - 32s - 986ms/step - loss: 1.3044 - val_loss: 1.5502
+## Epoch 2/50
+## 32/32 - 2s - 60ms/step - loss: 0.8717 - val_loss: 1.9567
+## Epoch 3/50
+## 32/32 - 2s - 60ms/step - loss: 0.7695 - val_loss: 2.1948
+## Epoch 4/50
+## 32/32 - 2s - 60ms/step - loss: 0.7080 - val_loss: 2.4837
+## Epoch 5/50
+## 32/32 - 2s - 64ms/step - loss: 0.6689 - val_loss: 2.7751
+## Epoch 6/50
+## 32/32 - 2s - 61ms/step - loss: 0.6378 - val_loss: 3.1156
+## Epoch 7/50
+## 32/32 - 2s - 60ms/step - loss: 0.6129 - val_loss: 3.3611
+## Epoch 8/50
+## 32/32 - 2s - 60ms/step - loss: 0.5917 - val_loss: 3.5193
+## Epoch 9/50
+## 32/32 - 2s - 62ms/step - loss: 0.5723 - val_loss: 3.6237
+## Epoch 10/50
+## 32/32 - 2s - 62ms/step - loss: 0.5538 - val_loss: 3.7209
+## Epoch 11/50
+## 32/32 - 2s - 60ms/step - loss: 0.5352 - val_loss: 3.8216
+## Epoch 12/50
+## 32/32 - 2s - 61ms/step - loss: 0.5161 - val_loss: 3.9090
+## Epoch 13/50
+## 32/32 - 2s - 60ms/step - loss: 0.4958 - val_loss: 4.0158
+## Epoch 14/50
+## 32/32 - 2s - 60ms/step - loss: 0.4739 - val_loss: 4.0756
+## Epoch 15/50
+## 32/32 - 2s - 60ms/step - loss: 0.4501 - val_loss: 4.0083
+## Epoch 16/50
+## 32/32 - 2s - 60ms/step - loss: 0.4243 - val_loss: 3.7728
+## Epoch 17/50
+## 32/32 - 2s - 60ms/step - loss: 0.3971 - val_loss: 3.3484
+## Epoch 18/50
+## 32/32 - 2s - 61ms/step - loss: 0.3696 - val_loss: 2.6909
+## Epoch 19/50
+## 32/32 - 2s - 60ms/step - loss: 0.3428 - val_loss: 1.9726
+## Epoch 20/50
+## 32/32 - 2s - 65ms/step - loss: 0.3178 - val_loss: 1.4081
+## Epoch 21/50
+## 32/32 - 2s - 65ms/step - loss: 0.2954 - val_loss: 1.1136
+## Epoch 22/50
+## 32/32 - 2s - 65ms/step - loss: 0.2764 - val_loss: 1.0246
+## Epoch 23/50
+## 32/32 - 2s - 61ms/step - loss: 0.2610 - val_loss: 1.0390
+## Epoch 24/50
+## 32/32 - 2s - 60ms/step - loss: 0.2507 - val_loss: 1.1001
+## Epoch 25/50
+## 32/32 - 2s - 60ms/step - loss: 0.2525 - val_loss: 1.1915
+## Epoch 26/50
+## 32/32 - 2s - 61ms/step - loss: 0.2961 - val_loss: 1.2296
+## Epoch 27/50
+## 32/32 - 2s - 61ms/step - loss: 0.3545 - val_loss: 1.2625
+## Epoch 28/50
+## 32/32 - 2s - 65ms/step - loss: 0.3326 - val_loss: 0.9711
+## Epoch 29/50
+## 32/32 - 2s - 60ms/step - loss: 0.3302 - val_loss: 1.0251
+## Epoch 30/50
+## 32/32 - 2s - 60ms/step - loss: 0.3163 - val_loss: 1.0867
+## Epoch 31/50
+## 32/32 - 2s - 60ms/step - loss: 0.3131 - val_loss: 1.4197
+## Epoch 32/50
+## 32/32 - 2s - 60ms/step - loss: 0.3093 - val_loss: 1.0851
+## Epoch 33/50
+## 32/32 - 2s - 61ms/step - loss: 0.2939 - val_loss: 1.4807
+## Epoch 34/50
+## 32/32 - 2s - 60ms/step - loss: 0.2763 - val_loss: 1.1851
+## Epoch 35/50
+## 32/32 - 2s - 60ms/step - loss: 0.2702 - val_loss: 1.1337
+## Epoch 36/50
+## 32/32 - 2s - 61ms/step - loss: 0.2584 - val_loss: 1.0315
+## Epoch 37/50
+## 32/32 - 2s - 61ms/step - loss: 0.2440 - val_loss: 1.0631
+## Epoch 38/50
+## 32/32 - 2s - 60ms/step - loss: 0.2353 - val_loss: 1.1609
+## Epoch 39/50
+## 32/32 - 2s - 61ms/step - loss: 0.2285 - val_loss: 1.1839
+## Epoch 40/50
+## 32/32 - 2s - 60ms/step - loss: 0.2307 - val_loss: 1.1293
+## Epoch 41/50
+## 32/32 - 2s - 61ms/step - loss: 0.2357 - val_loss: 0.9948
+## Epoch 42/50
+## 32/32 - 2s - 60ms/step - loss: 0.2279 - val_loss: 1.1329
+## Epoch 43/50
+## 32/32 - 2s - 61ms/step - loss: 0.2174 - val_loss: 1.0418
+## Epoch 44/50
+## 32/32 - 2s - 61ms/step - loss: 0.2076 - val_loss: 1.1141
+## Epoch 45/50
+## 32/32 - 2s - 61ms/step - loss: 0.2004 - val_loss: 1.1162
+## Epoch 46/50
+## 32/32 - 2s - 60ms/step - loss: 0.1930 - val_loss: 1.0939
+## Epoch 47/50
+## 32/32 - 2s - 61ms/step - loss: 0.1852 - val_loss: 1.0499
+## Epoch 48/50
+## 32/32 - 2s - 60ms/step - loss: 0.1796 - val_loss: 1.0651
+## Epoch 49/50
+## 32/32 - 2s - 61ms/step - loss: 0.1762 - val_loss: 1.0679
+## Epoch 50/50
+## 32/32 - 2s - 60ms/step - loss: 0.1757 - val_loss: 1.1762
+
+
+

Visualize predictions +

+
+model <- load_model("models/oxford_segmentation.keras")
+# Generate predictions for all images in the validation set
+val_dataset <- get_dataset(
+  batch_size, img_size, val_input_img_paths, val_target_img_paths
+)
+val_preds <- predict(model, val_dataset)
+
## 32/32 - 4s - 111ms/step
+
+display_mask <- function(i) {
+  # Quick utility to display a model's prediction.
+  mask <- val_preds[i,,,] %>%
+    apply(c(1,2), which.max) %>%
+    array_reshape(dim = c(img_size, 1))
+  mask <- abind::abind(mask, mask, mask, along = 3)
+  plot(as.raster(mask, max = 3))
+}
+
+# Display results for validation image #10
+i <- 10
+
+par(mfrow = c(1, 3))
+# Display input image
+input_img_paths[i] |>
+  jpeg::readJPEG() |>
+  as.raster() |>
+  plot()
+
+# Display ground-truth target mask
+target_img_paths[i] |>
+  png::readPNG() |>
+  magrittr::multiply_by(255)|>
+  as.raster(max = 3) |>
+  plot()
+
+# Display mask predicted by our model
+display_mask(i)  # Note that the model only sees inputs at 150x150.
+
+plot of chunk unnamed-chunk-9
plot of chunk unnamed-chunk-9
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-4-1.png b/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-4-1.png new file mode 100644 index 0000000000..75e535ac6b Binary files /dev/null and b/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-4-1.png differ diff --git a/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-4-2.png b/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-4-2.png new file mode 100644 index 0000000000..816d0d588d Binary files /dev/null and b/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-4-2.png differ diff --git a/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-9-1.png b/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-9-1.png new file mode 100644 index 0000000000..2e625f156e Binary files /dev/null and b/docs/articles/examples/oxford_pets_image_segmentation/unnamed-chunk-9-1.png differ diff --git a/docs/articles/examples/structured_data/imbalanced_classification.html b/docs/articles/examples/structured_data/imbalanced_classification.html new file mode 100644 index 0000000000..2a1070415f --- /dev/null +++ b/docs/articles/examples/structured_data/imbalanced_classification.html @@ -0,0 +1,403 @@ + + + + + + + + +Imbalanced classification: credit card fraud detection • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + + +
+

Introduction +

+

This example looks at the Kaggle Credit +Card Fraud Detection dataset to demonstrate how to train a +classification model on data with highly imbalanced classes. You can +download the data by clicking “Download” at the link, or if you’re setup +with a kaggle API key at "~/.kaggle/kagle.json", you can +run the following:

+
+reticulate::py_install("kaggle", pip = TRUE)
+reticulate::py_available(TRUE) # ensure 'kaggle' is on the PATH
+system("kaggle datasets download -d mlg-ulb/creditcardfraud")
+zip::unzip("creditcardfraud.zip", files = "creditcard.csv")
+
+
+

First, load the data +

+
+library(readr)
+df <- read_csv("creditcard.csv", col_types = cols(
+  Class = col_integer(),
+  .default = col_double()
+))
+tibble::glimpse(df)
+
## Rows: 284,807
+## Columns: 31
+## $ Time   <dbl> 0, 0, 1, 1, 2, 2, 4, 7, 7, 9, 10, 10, 10, 11, 12, 12, 12, 1…
+## $ V1     <dbl> -1.3598071, 1.1918571, -1.3583541, -0.9662717, -1.1582331, …
+## $ V2     <dbl> -0.07278117, 0.26615071, -1.34016307, -0.18522601, 0.877736…
+## $ V3     <dbl> 2.53634674, 0.16648011, 1.77320934, 1.79299334, 1.54871785,…
+## $ V4     <dbl> 1.37815522, 0.44815408, 0.37977959, -0.86329128, 0.40303393…
+## $ V5     <dbl> -0.33832077, 0.06001765, -0.50319813, -0.01030888, -0.40719…
+## $ V6     <dbl> 0.46238778, -0.08236081, 1.80049938, 1.24720317, 0.09592146…
+## $ V7     <dbl> 0.239598554, -0.078802983, 0.791460956, 0.237608940, 0.5929…
+## $ V8     <dbl> 0.098697901, 0.085101655, 0.247675787, 0.377435875, -0.2705…
+## $ V9     <dbl> 0.3637870, -0.2554251, -1.5146543, -1.3870241, 0.8177393, -…
+## $ V10    <dbl> 0.09079417, -0.16697441, 0.20764287, -0.05495192, 0.7530744…
+## $ V11    <dbl> -0.55159953, 1.61272666, 0.62450146, -0.22648726, -0.822842…
+## $ V12    <dbl> -0.61780086, 1.06523531, 0.06608369, 0.17822823, 0.53819555…
+## $ V13    <dbl> -0.99138985, 0.48909502, 0.71729273, 0.50775687, 1.34585159…
+## $ V14    <dbl> -0.31116935, -0.14377230, -0.16594592, -0.28792375, -1.1196…
+## $ V15    <dbl> 1.468176972, 0.635558093, 2.345864949, -0.631418118, 0.1751…
+## $ V16    <dbl> -0.47040053, 0.46391704, -2.89008319, -1.05964725, -0.45144…
+## $ V17    <dbl> 0.207971242, -0.114804663, 1.109969379, -0.684092786, -0.23…
+## $ V18    <dbl> 0.02579058, -0.18336127, -0.12135931, 1.96577500, -0.038194…
+## $ V19    <dbl> 0.40399296, -0.14578304, -2.26185710, -1.23262197, 0.803486…
+## $ V20    <dbl> 0.25141210, -0.06908314, 0.52497973, -0.20803778, 0.4085423…
+## $ V21    <dbl> -0.018306778, -0.225775248, 0.247998153, -0.108300452, -0.0…
+## $ V22    <dbl> 0.277837576, -0.638671953, 0.771679402, 0.005273597, 0.7982…
+## $ V23    <dbl> -0.110473910, 0.101288021, 0.909412262, -0.190320519, -0.13…
+## $ V24    <dbl> 0.06692807, -0.33984648, -0.68928096, -1.17557533, 0.141266…
+## $ V25    <dbl> 0.12853936, 0.16717040, -0.32764183, 0.64737603, -0.2060095…
+## $ V26    <dbl> -0.18911484, 0.12589453, -0.13909657, -0.22192884, 0.502292…
+## $ V27    <dbl> 0.133558377, -0.008983099, -0.055352794, 0.062722849, 0.219…
+## $ V28    <dbl> -0.021053053, 0.014724169, -0.059751841, 0.061457629, 0.215…
+## $ Amount <dbl> 149.62, 2.69, 378.66, 123.50, 69.99, 3.67, 4.99, 40.80, 93.…
+## $ Class  <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
+
+
+

Prepare a validation set +

+
+val_idx <- nrow(df) %>% sample.int(., round( . * 0.2))
+val_df <- df[val_idx, ]
+train_df <- df[-val_idx, ]
+
+cat("Number of training samples:", nrow(train_df), "\n")
+
## Number of training samples: 227846
+
+cat("Number of validation samples:", nrow(val_df), "\n")
+
## Number of validation samples: 56961
+
+
+

Analyze class imbalance in the targets +

+
+counts <- table(train_df$Class)
+counts
+
##
+##      0      1
+## 227463    383
+
+cat(sprintf("Number of positive samples in training data: %i (%.2f%% of total)",
+            counts["1"], 100 * counts["1"] / sum(counts)))
+
## Number of positive samples in training data: 383 (0.17% of total)
+
+weight_for_0 = 1 / counts["0"]
+weight_for_1 = 1 / counts["1"]
+
+
+

Normalize the data using training set statistics +

+
+feature_names <- colnames(train_df) %>% setdiff("Class")
+
+train_features <- as.matrix(train_df[feature_names])
+train_targets <- as.matrix(train_df$Class)
+
+val_features <- as.matrix(val_df[feature_names])
+val_targets <- as.matrix(val_df$Class)
+
+train_features %<>% scale()
+val_features %<>% scale(center = attr(train_features, "scaled:center"),
+                        scale = attr(train_features, "scaled:scale"))
+
+
+

Build a binary classification model +

+
+model <-
+  keras_model_sequential(input_shape = ncol(train_features)) |>
+  layer_dense(256, activation = "relu") |>
+  layer_dense(256, activation = "relu") |>
+  layer_dropout(0.3) |>
+  layer_dense(256, activation = "relu") |>
+  layer_dropout(0.3) |>
+  layer_dense(1, activation = "sigmoid")
+
+model
+
## Model: "sequential"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ dense (Dense)                   │ (None, 256)            │         7,936
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_1 (Dense)                 │ (None, 256)            │        65,792
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, 256)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_2 (Dense)                 │ (None, 256)            │        65,792
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout_1 (Dropout)             │ (None, 256)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_3 (Dense)                 │ (None, 1)              │           257
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 139,777 (546.00 KB)
+##  Trainable params: 139,777 (546.00 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+
+

Train the model with class_weight argument +

+
+metrics <- list(
+  metric_false_negatives(name = "fn"),
+  metric_false_positives(name = "fp"),
+  metric_true_negatives(name = "tn"),
+  metric_true_positives(name = "tp"),
+  metric_precision(name = "precision"),
+  metric_recall(name = "recall")
+)
+model |> compile(
+  optimizer = optimizer_adam(1e-2),
+  loss = "binary_crossentropy",
+  metrics = metrics
+)
+callbacks <- list(
+  callback_model_checkpoint("fraud_model_at_epoch_{epoch}.keras")
+)
+
+class_weight <- list("0" = weight_for_0,
+                     "1" = weight_for_1)
+
+model |> fit(
+  train_features, train_targets,
+  validation_data = list(val_features, val_targets),
+  class_weight = class_weight,
+  batch_size = 2048,
+  epochs = 30,
+  callbacks = callbacks,
+  verbose = 2
+)
+
## Epoch 1/30
+## 112/112 - 2s - 22ms/step - fn: 42.0000 - fp: 22983.0000 - loss: 2.3302e-06 - precision: 0.0146 - recall: 0.8903 - tn: 204480.0000 - tp: 341.0000 - val_fn: 12.0000 - val_fp: 536.0000 - val_loss: 0.0540 - val_precision: 0.1532 - val_recall: 0.8899 - val_tn: 56316.0000 - val_tp: 97.0000
+## Epoch 2/30
+## 112/112 - 0s - 4ms/step - fn: 36.0000 - fp: 6311.0000 - loss: 1.4468e-06 - precision: 0.0521 - recall: 0.9060 - tn: 221152.0000 - tp: 347.0000 - val_fn: 9.0000 - val_fp: 1081.0000 - val_loss: 0.0956 - val_precision: 0.0847 - val_recall: 0.9174 - val_tn: 55771.0000 - val_tp: 100.0000
+## Epoch 3/30
+## 112/112 - 0s - 2ms/step - fn: 27.0000 - fp: 7704.0000 - loss: 1.2139e-06 - precision: 0.0442 - recall: 0.9295 - tn: 219759.0000 - tp: 356.0000 - val_fn: 11.0000 - val_fp: 631.0000 - val_loss: 0.0443 - val_precision: 0.1344 - val_recall: 0.8991 - val_tn: 56221.0000 - val_tp: 98.0000
+## Epoch 4/30
+## 112/112 - 0s - 1ms/step - fn: 26.0000 - fp: 8933.0000 - loss: 1.1396e-06 - precision: 0.0384 - recall: 0.9321 - tn: 218530.0000 - tp: 357.0000 - val_fn: 4.0000 - val_fp: 3813.0000 - val_loss: 0.1850 - val_precision: 0.0268 - val_recall: 0.9633 - val_tn: 53039.0000 - val_tp: 105.0000
+## Epoch 5/30
+## 112/112 - 0s - 1ms/step - fn: 21.0000 - fp: 6451.0000 - loss: 9.1104e-07 - precision: 0.0531 - recall: 0.9452 - tn: 221012.0000 - tp: 362.0000 - val_fn: 9.0000 - val_fp: 1398.0000 - val_loss: 0.0724 - val_precision: 0.0668 - val_recall: 0.9174 - val_tn: 55454.0000 - val_tp: 100.0000
+## Epoch 6/30
+## 112/112 - 0s - 1ms/step - fn: 16.0000 - fp: 5997.0000 - loss: 7.4602e-07 - precision: 0.0577 - recall: 0.9582 - tn: 221466.0000 - tp: 367.0000 - val_fn: 8.0000 - val_fp: 2379.0000 - val_loss: 0.1421 - val_precision: 0.0407 - val_recall: 0.9266 - val_tn: 54473.0000 - val_tp: 101.0000
+## Epoch 7/30
+## 112/112 - 0s - 1ms/step - fn: 14.0000 - fp: 7075.0000 - loss: 7.6051e-07 - precision: 0.0496 - recall: 0.9634 - tn: 220388.0000 - tp: 369.0000 - val_fn: 10.0000 - val_fp: 888.0000 - val_loss: 0.0525 - val_precision: 0.1003 - val_recall: 0.9083 - val_tn: 55964.0000 - val_tp: 99.0000
+## Epoch 8/30
+## 112/112 - 0s - 2ms/step - fn: 20.0000 - fp: 6333.0000 - loss: 8.2217e-07 - precision: 0.0542 - recall: 0.9478 - tn: 221130.0000 - tp: 363.0000 - val_fn: 6.0000 - val_fp: 3549.0000 - val_loss: 0.1391 - val_precision: 0.0282 - val_recall: 0.9450 - val_tn: 53303.0000 - val_tp: 103.0000
+## Epoch 9/30
+## 112/112 - 0s - 2ms/step - fn: 11.0000 - fp: 7070.0000 - loss: 8.2005e-07 - precision: 0.0500 - recall: 0.9713 - tn: 220393.0000 - tp: 372.0000 - val_fn: 11.0000 - val_fp: 1307.0000 - val_loss: 0.0807 - val_precision: 0.0698 - val_recall: 0.8991 - val_tn: 55545.0000 - val_tp: 98.0000
+## Epoch 10/30
+## 112/112 - 0s - 2ms/step - fn: 13.0000 - fp: 6641.0000 - loss: 7.9420e-07 - precision: 0.0528 - recall: 0.9661 - tn: 220822.0000 - tp: 370.0000 - val_fn: 8.0000 - val_fp: 1721.0000 - val_loss: 0.0840 - val_precision: 0.0554 - val_recall: 0.9266 - val_tn: 55131.0000 - val_tp: 101.0000
+## Epoch 11/30
+## 112/112 - 0s - 2ms/step - fn: 13.0000 - fp: 7348.0000 - loss: 7.5710e-07 - precision: 0.0479 - recall: 0.9661 - tn: 220115.0000 - tp: 370.0000 - val_fn: 9.0000 - val_fp: 1006.0000 - val_loss: 0.0516 - val_precision: 0.0904 - val_recall: 0.9174 - val_tn: 55846.0000 - val_tp: 100.0000
+## Epoch 12/30
+## 112/112 - 0s - 2ms/step - fn: 11.0000 - fp: 5095.0000 - loss: 5.2767e-07 - precision: 0.0680 - recall: 0.9713 - tn: 222368.0000 - tp: 372.0000 - val_fn: 10.0000 - val_fp: 932.0000 - val_loss: 0.0462 - val_precision: 0.0960 - val_recall: 0.9083 - val_tn: 55920.0000 - val_tp: 99.0000
+## Epoch 13/30
+## 112/112 - 0s - 2ms/step - fn: 4.0000 - fp: 4173.0000 - loss: 4.2331e-07 - precision: 0.0833 - recall: 0.9896 - tn: 223290.0000 - tp: 379.0000 - val_fn: 11.0000 - val_fp: 442.0000 - val_loss: 0.0245 - val_precision: 0.1815 - val_recall: 0.8991 - val_tn: 56410.0000 - val_tp: 98.0000
+## Epoch 14/30
+## 112/112 - 0s - 2ms/step - fn: 9.0000 - fp: 5914.0000 - loss: 6.4355e-07 - precision: 0.0595 - recall: 0.9765 - tn: 221549.0000 - tp: 374.0000 - val_fn: 8.0000 - val_fp: 1851.0000 - val_loss: 0.0791 - val_precision: 0.0517 - val_recall: 0.9266 - val_tn: 55001.0000 - val_tp: 101.0000
+## Epoch 15/30
+## 112/112 - 0s - 2ms/step - fn: 7.0000 - fp: 6205.0000 - loss: 5.7546e-07 - precision: 0.0571 - recall: 0.9817 - tn: 221258.0000 - tp: 376.0000 - val_fn: 8.0000 - val_fp: 1595.0000 - val_loss: 0.0678 - val_precision: 0.0596 - val_recall: 0.9266 - val_tn: 55257.0000 - val_tp: 101.0000
+## Epoch 16/30
+## 112/112 - 0s - 2ms/step - fn: 8.0000 - fp: 6424.0000 - loss: 7.3225e-07 - precision: 0.0552 - recall: 0.9791 - tn: 221039.0000 - tp: 375.0000 - val_fn: 11.0000 - val_fp: 904.0000 - val_loss: 0.0412 - val_precision: 0.0978 - val_recall: 0.8991 - val_tn: 55948.0000 - val_tp: 98.0000
+## Epoch 17/30
+## 112/112 - 0s - 2ms/step - fn: 4.0000 - fp: 4041.0000 - loss: 3.7349e-07 - precision: 0.0857 - recall: 0.9896 - tn: 223422.0000 - tp: 379.0000 - val_fn: 9.0000 - val_fp: 1348.0000 - val_loss: 0.0906 - val_precision: 0.0691 - val_recall: 0.9174 - val_tn: 55504.0000 - val_tp: 100.0000
+## Epoch 18/30
+## 112/112 - 0s - 2ms/step - fn: 6.0000 - fp: 5602.0000 - loss: 6.0889e-07 - precision: 0.0631 - recall: 0.9843 - tn: 221861.0000 - tp: 377.0000 - val_fn: 8.0000 - val_fp: 935.0000 - val_loss: 0.0487 - val_precision: 0.0975 - val_recall: 0.9266 - val_tn: 55917.0000 - val_tp: 101.0000
+## Epoch 19/30
+## 112/112 - 0s - 2ms/step - fn: 7.0000 - fp: 5741.0000 - loss: 5.3650e-07 - precision: 0.0615 - recall: 0.9817 - tn: 221722.0000 - tp: 376.0000 - val_fn: 8.0000 - val_fp: 1279.0000 - val_loss: 0.0567 - val_precision: 0.0732 - val_recall: 0.9266 - val_tn: 55573.0000 - val_tp: 101.0000
+## Epoch 20/30
+## 112/112 - 0s - 2ms/step - fn: 1.0000 - fp: 3224.0000 - loss: 2.7791e-07 - precision: 0.1059 - recall: 0.9974 - tn: 224239.0000 - tp: 382.0000 - val_fn: 11.0000 - val_fp: 705.0000 - val_loss: 0.0347 - val_precision: 0.1220 - val_recall: 0.8991 - val_tn: 56147.0000 - val_tp: 98.0000
+## Epoch 21/30
+## 112/112 - 0s - 2ms/step - fn: 2.0000 - fp: 3403.0000 - loss: 2.7057e-07 - precision: 0.1007 - recall: 0.9948 - tn: 224060.0000 - tp: 381.0000 - val_fn: 11.0000 - val_fp: 330.0000 - val_loss: 0.0178 - val_precision: 0.2290 - val_recall: 0.8991 - val_tn: 56522.0000 - val_tp: 98.0000
+## Epoch 22/30
+## 112/112 - 0s - 2ms/step - fn: 3.0000 - fp: 1904.0000 - loss: 2.0272e-07 - precision: 0.1664 - recall: 0.9922 - tn: 225559.0000 - tp: 380.0000 - val_fn: 10.0000 - val_fp: 1666.0000 - val_loss: 0.1536 - val_precision: 0.0561 - val_recall: 0.9083 - val_tn: 55186.0000 - val_tp: 99.0000
+## Epoch 23/30
+## 112/112 - 0s - 2ms/step - fn: 11.0000 - fp: 6652.0000 - loss: 7.3955e-07 - precision: 0.0530 - recall: 0.9713 - tn: 220811.0000 - tp: 372.0000 - val_fn: 9.0000 - val_fp: 1633.0000 - val_loss: 0.0681 - val_precision: 0.0577 - val_recall: 0.9174 - val_tn: 55219.0000 - val_tp: 100.0000
+## Epoch 24/30
+## 112/112 - 0s - 2ms/step - fn: 3.0000 - fp: 3458.0000 - loss: 2.8492e-07 - precision: 0.0990 - recall: 0.9922 - tn: 224005.0000 - tp: 380.0000 - val_fn: 12.0000 - val_fp: 635.0000 - val_loss: 0.0299 - val_precision: 0.1325 - val_recall: 0.8899 - val_tn: 56217.0000 - val_tp: 97.0000
+## Epoch 25/30
+## 112/112 - 0s - 2ms/step - fn: 7.0000 - fp: 5581.0000 - loss: 6.4007e-07 - precision: 0.0631 - recall: 0.9817 - tn: 221882.0000 - tp: 376.0000 - val_fn: 9.0000 - val_fp: 1782.0000 - val_loss: 0.0677 - val_precision: 0.0531 - val_recall: 0.9174 - val_tn: 55070.0000 - val_tp: 100.0000
+## Epoch 26/30
+## 112/112 - 0s - 2ms/step - fn: 1.0000 - fp: 3655.0000 - loss: 2.9078e-07 - precision: 0.0946 - recall: 0.9974 - tn: 223808.0000 - tp: 382.0000 - val_fn: 13.0000 - val_fp: 677.0000 - val_loss: 0.0310 - val_precision: 0.1242 - val_recall: 0.8807 - val_tn: 56175.0000 - val_tp: 96.0000
+## Epoch 27/30
+## 112/112 - 0s - 2ms/step - fn: 2.0000 - fp: 3187.0000 - loss: 2.7425e-07 - precision: 0.1068 - recall: 0.9948 - tn: 224276.0000 - tp: 381.0000 - val_fn: 12.0000 - val_fp: 701.0000 - val_loss: 0.0271 - val_precision: 0.1216 - val_recall: 0.8899 - val_tn: 56151.0000 - val_tp: 97.0000
+## Epoch 28/30
+## 112/112 - 0s - 2ms/step - fn: 10.0000 - fp: 4659.0000 - loss: 6.8191e-07 - precision: 0.0741 - recall: 0.9739 - tn: 222804.0000 - tp: 373.0000 - val_fn: 11.0000 - val_fp: 1022.0000 - val_loss: 0.0797 - val_precision: 0.0875 - val_recall: 0.8991 - val_tn: 55830.0000 - val_tp: 98.0000
+## Epoch 29/30
+## 112/112 - 0s - 2ms/step - fn: 4.0000 - fp: 4183.0000 - loss: 3.9729e-07 - precision: 0.0831 - recall: 0.9896 - tn: 223280.0000 - tp: 379.0000 - val_fn: 12.0000 - val_fp: 406.0000 - val_loss: 0.0232 - val_precision: 0.1928 - val_recall: 0.8899 - val_tn: 56446.0000 - val_tp: 97.0000
+## Epoch 30/30
+## 112/112 - 0s - 2ms/step - fn: 2.0000 - fp: 2432.0000 - loss: 2.3592e-07 - precision: 0.1354 - recall: 0.9948 - tn: 225031.0000 - tp: 381.0000 - val_fn: 13.0000 - val_fp: 371.0000 - val_loss: 0.0200 - val_precision: 0.2056 - val_recall: 0.8807 - val_tn: 56481.0000 - val_tp: 96.0000
+
+val_pred <- model %>%
+  predict(val_features) %>%
+  { as.integer(. > 0.5) }
+
## 1781/1781 - 1s - 286us/step
+
+pred_correct <- val_df$Class == val_pred
+cat(sprintf("Validation accuracy: %.2f", mean(pred_correct)))
+
## Validation accuracy: 0.99
+
+fraudulent <- val_df$Class == 1
+
+n_fraudulent_detected <- sum(fraudulent & pred_correct)
+n_fraudulent_missed <- sum(fraudulent & !pred_correct)
+n_legitimate_flagged <- sum(!fraudulent & !pred_correct)
+
+
+

Conclusions +

+

At the end of training, out of 56,961 validation transactions, we +are:

+
    +
  • Correctly identifying 96 of them as fraudulent
  • +
  • Missing 13 fraudulent transactions
  • +
  • At the cost of incorrectly flagging 371 legitimate transactions
  • +
+

In the real world, one would put an even higher weight on class 1, so +as to reflect that False Negatives are more costly than False +Positives.

+

Next time your credit card gets declined in an online purchase – this +is why.

+ + + + +
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/structured_data/structured_data_classification_with_feature_space.html b/docs/articles/examples/structured_data/structured_data_classification_with_feature_space.html new file mode 100644 index 0000000000..f1e6198ad3 --- /dev/null +++ b/docs/articles/examples/structured_data/structured_data_classification_with_feature_space.html @@ -0,0 +1,675 @@ + + + + + + + + +Structured data classification with FeatureSpace • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This example demonstrates how to do structured data classification +(also known as tabular data classification), starting from a raw CSV +file. Our data includes numerical features, and integer categorical +features, and string categorical features. We will use the utility +layer_feature_space() to index, preprocess, and encode our +features.

+

The code is adapted from the example Structured +data classification from scratch. While the previous example managed +its own low-level feature preprocessing and encoding with Keras +preprocessing layers, in this example we delegate everything to +layer_feature_space(), making the workflow extremely quick +and easy.

+
+

The dataset +

+

Our +dataset is provided by the Cleveland Clinic Foundation for Heart +Disease. It’s a CSV file with 303 rows. Each row contains information +about a patient (a sample), and each column describes +an attribute of the patient (a feature). We use the +features to predict whether a patient has a heart disease +(binary classification).

+

Here’s the description of each feature:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ColumnDescriptionFeature Type
AgeAge in yearsNumerical
Sex(1 = male; 0 = female)Categorical
CPChest pain type (0, 1, 2, 3, 4)Categorical
TrestbpdResting blood pressure (in mm Hg on admission)Numerical
CholSerum cholesterol in mg/dlNumerical
FBSfasting blood sugar in 120 mg/dl (1 = true; 0 = false)Categorical
RestECGResting electrocardiogram results (0, 1, 2)Categorical
ThalachMaximum heart rate achievedNumerical
ExangExercise induced angina (1 = yes; 0 = no)Categorical
OldpeakST depression induced by exercise relative to restNumerical
SlopeSlope of the peak exercise ST segmentNumerical
CANumber of major vessels (0-3) colored by fluoroscopyBoth numerical & categorical
Thal3 = normal; 6 = fixed defect; 7 = reversible defectCategorical
TargetDiagnosis of heart disease (1 = true; 0 = false)Target
+
+
+
+

Setup +

+
+library(readr)
+library(dplyr, warn.conflicts = FALSE)
+library(keras3)
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+
+conflicted::conflicts_prefer(
+  keras3::shape(),
+  keras3::set_random_seed(),
+  dplyr::filter(),
+  .quiet = TRUE
+)
+
+use_backend("tensorflow")
+
+
+

Preparing the data +

+

Let’s download the data and load it into a Pandas dataframe:

+
+file_url <-
+  "http://storage.googleapis.com/download.tensorflow.org/data/heart.csv"
+df <- read_csv(file_url, col_types = cols(
+  oldpeak = col_double(),
+  thal = col_character(),
+  .default = col_integer()
+))
+
+# the dataset has two malformed rows, filter them out
+df <- df |> filter(!thal %in% c("1", "2"))
+

The dataset includes 303 samples with 14 columns per sample (13 +features, plus the target label)

+
+glimpse(df)
+
## Rows: 301
+## Columns: 14
+## $ age      <int> 63, 67, 67, 37, 41, 56, 62, 57, 63, 53, 57, 56, 56, 44, 5…
+## $ sex      <int> 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, …
+## $ cp       <int> 1, 4, 4, 3, 2, 2, 4, 4, 4, 4, 4, 2, 3, 2, 3, 3, 2, 4, 3, …
+## $ trestbps <int> 145, 160, 120, 130, 130, 120, 140, 120, 130, 140, 140, 14…
+## $ chol     <int> 233, 286, 229, 250, 204, 236, 268, 354, 254, 203, 192, 29…
+## $ fbs      <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, …
+## $ restecg  <int> 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, …
+## $ thalach  <int> 150, 108, 129, 187, 172, 178, 160, 163, 147, 155, 148, 15…
+## $ exang    <int> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
+## $ oldpeak  <dbl> 2.3, 1.5, 2.6, 3.5, 1.4, 0.8, 3.6, 0.6, 1.4, 3.1, 0.4, 1.…
+## $ slope    <int> 3, 2, 2, 3, 1, 1, 3, 1, 2, 3, 2, 2, 2, 1, 1, 1, 3, 1, 1, …
+## $ ca       <int> 0, 3, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
+## $ thal     <chr> "fixed", "normal", "reversible", "normal", "normal", "nor…
+## $ target   <int> 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
+

Here’s a preview of a few samples:

+
+df
+
## # A tibble: 301 × 14
+##      age   sex    cp trestbps  chol   fbs restecg thalach exang oldpeak
+##    <int> <int> <int>    <int> <int> <int>   <int>   <int> <int>   <dbl>
+##  1    63     1     1      145   233     1       2     150     0     2.3
+##  2    67     1     4      160   286     0       2     108     1     1.5
+##  3    67     1     4      120   229     0       2     129     1     2.6
+##  4    37     1     3      130   250     0       0     187     0     3.5
+##  5    41     0     2      130   204     0       2     172     0     1.4
+##  6    56     1     2      120   236     0       0     178     0     0.8
+##  7    62     0     4      140   268     0       2     160     0     3.6
+##  8    57     0     4      120   354     0       0     163     1     0.6
+##  9    63     1     4      130   254     0       2     147     0     1.4
+## 10    53     1     4      140   203     1       2     155     1     3.1
+## # ℹ 291 more rows
+## # ℹ 4 more variables: slope <int>, ca <int>, thal <chr>, target <int>
+

The last column, “target”, indicates whether the patient has a heart +disease (1) or not (0).

+

Let’s split the data into a training and validation set:

+
+val_idx <- nrow(df) %>% sample.int(., . * 0.2)
+val_df <- df[val_idx, ]
+train_df <- df[-val_idx, ]
+
+cat(sprintf(
+  "Using %d samples for training and %d for validation",
+  nrow(train_df), nrow(val_df)
+))
+
## Using 241 samples for training and 60 for validation
+

Let’s generate tf_dataset objects for each +dataframe:

+
+dataframe_to_dataset <- function(df) {
+  labels <- df |> pull(target) |> as.integer()
+  inputs <- df |> select(-target) |> as.list()
+
+  ds <- tensor_slices_dataset(list(inputs, labels)) |>
+    dataset_shuffle(nrow(df))
+
+  ds
+}
+
+train_ds <- dataframe_to_dataset(train_df)
+val_ds <- dataframe_to_dataset(val_df)
+

Each tf_dataset yields a tuple +(input, target) where input is a dictionary (a +named list) of features and target is the value +0 or 1:

+
+c(x, y) %<-% iter_next(as_iterator(train_ds))
+cat("Input: "); str(x)
+cat("Target: "); str(y)
+
## Input: List of 13
+##  $ age     :<tf.Tensor: shape=(), dtype=int32, numpy=41>
+##  $ sex     :<tf.Tensor: shape=(), dtype=int32, numpy=1>
+##  $ cp      :<tf.Tensor: shape=(), dtype=int32, numpy=2>
+##  $ trestbps:<tf.Tensor: shape=(), dtype=int32, numpy=120>
+##  $ chol    :<tf.Tensor: shape=(), dtype=int32, numpy=157>
+##  $ fbs     :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ restecg :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ thalach :<tf.Tensor: shape=(), dtype=int32, numpy=182>
+##  $ exang   :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ oldpeak :<tf.Tensor: shape=(), dtype=float32, numpy=0.0>
+##  $ slope   :<tf.Tensor: shape=(), dtype=int32, numpy=1>
+##  $ ca      :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ thal    :<tf.Tensor: shape=(), dtype=string, numpy=b'normal'>
+## Target: <tf.Tensor: shape=(), dtype=int32, numpy=0>
+

Let’s batch the datasets:

+
+train_ds <- train_ds |> dataset_batch(32)
+val_ds <- val_ds |> dataset_batch(32)
+
+
+

Configuring a FeatureSpace +

+

To configure how each feature should be preprocessed, we instantiate +a layer_feature_space, and we pass to it a dictionary +(named list with unique names) that maps the name of our features to a +string that describes the feature type.

+

We have a few “integer categorical” features such as +"FBS", one “string categorical” feature +("thal"), and a few numerical features, which we’d like to +normalize – except "age", which we’d like to discretize +into a number of bins.

+

We also use the crosses argument to capture feature +interactions for some categorical features, that is to say, create +additional features that represent value co-occurrences for these +categorical features. You can compute feature crosses like this for +arbitrary sets of categorical features – not just tuples of two +features. Because the resulting co-occurences are hashed into a +fixed-sized vector, you don’t need to worry about whether the +co-occurence space is too large.

+
+feature_space <- layer_feature_space(
+  features = list(
+    # Categorical features encoded as integers
+    sex = "integer_categorical",
+    cp = "integer_categorical",
+    fbs = "integer_categorical",
+    restecg = "integer_categorical",
+    exang = "integer_categorical",
+    ca = "integer_categorical",
+    # Categorical feature encoded as string
+    thal = "string_categorical",
+    # Numerical features to discretize
+    age = "float_discretized",
+    # Numerical features to normalize
+    trestbps = "float_normalized",
+    chol = "float_normalized",
+    thalach = "float_normalized",
+    oldpeak = "float_normalized",
+    slope = "float_normalized"
+  ),
+  # We create additional features by hashing
+  # value co-occurrences for the
+  # following groups of categorical features.
+  crosses = list(c("sex", "age"), c("thal", "ca")),
+  # The hashing space for these co-occurrences
+  # wil be 32-dimensional.
+  crossing_dim = 32,
+  # Our utility will one-hot encode all categorical
+  # features and concat all features into a single
+  # vector (one vector per sample).
+  output_mode = "concat"
+)
+
+
+

Further customizing a FeatureSpace +

+

Specifying the feature type via a string name is quick and easy, but +sometimes you may want to further configure the preprocessing of each +feature. For instance, in our case, our categorical features don’t have +a large set of possible values – it’s only a handful of values per +feature (e.g. 1 and 0 for the feature +"FBS"), and all possible values are represented in the +training set. As a result, we don’t need to reserve an index to +represent “out of vocabulary” values for these features – which would +have been the default behavior. Below, we just specify +num_oov_indices=0 in each of these features to tell the +feature preprocessor to skip “out of vocabulary” indexing.

+

Other customizations you have access to include specifying the number +of bins for discretizing features of type +"float_discretized", or the dimensionality of the hashing +space for feature crossing.

+
+feature_space <- layer_feature_space(
+  features = list(
+    # Categorical features encoded as integers
+    sex       = feature_integer_categorical(num_oov_indices = 0),
+    cp        = feature_integer_categorical(num_oov_indices = 0),
+    fbs       = feature_integer_categorical(num_oov_indices = 0),
+    restecg   = feature_integer_categorical(num_oov_indices = 0),
+    exang     = feature_integer_categorical(num_oov_indices = 0),
+    ca        = feature_integer_categorical(num_oov_indices = 0),
+    # Categorical feature encoded as string
+    thal      = feature_string_categorical(num_oov_indices = 0),
+    # Numerical features to discretize
+    age       = feature_float_discretized(num_bins = 30),
+    # Numerical features to normalize
+    trestbps  = feature_float_normalized(),
+    chol      = feature_float_normalized(),
+    thalach   = feature_float_normalized(),
+    oldpeak   = feature_float_normalized(),
+    slope     = feature_float_normalized()
+  ),
+  # Specify feature cross with a custom crossing dim.
+  crosses = list(
+    feature_cross(
+      feature_names = c("sex", "age"),
+      crossing_dim = 64
+    ),
+    feature_cross(
+      feature_names = c("thal", "ca"),
+      crossing_dim = 16
+    )
+  ),
+  output_mode = "concat"
+)
+
+
+

Adapt the FeatureSpace to the training data +

+

Before we start using the FeatureSpace to build a model, +we have to adapt it to the training data. During adapt(), +the FeatureSpace will:

+
    +
  • Index the set of possible values for categorical features.
  • +
  • Compute the mean and variance for numerical features to +normalize.
  • +
  • Compute the value boundaries for the different bins for numerical +features to discretize.
  • +
+

Note that adapt() should be called on a +tf_dataset which yields dicts (named lists) of feature +values – no labels.

+
+train_ds_with_no_labels <- train_ds |> dataset_map(\(x, y) x)
+feature_space |> adapt(train_ds_with_no_labels)
+

At this point, the FeatureSpace can be called on a dict +of raw feature values, and will return a single concatenate vector for +each sample, combining encoded features and feature crosses.

+
+c(x, y) %<-% iter_next(as_iterator(train_ds))
+preprocessed_x <- feature_space(x)
+preprocessed_x
+
## tf.Tensor(
+## [[0. 0. 0. ... 0. 1. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]
+##  ...
+##  [0. 0. 0. ... 0. 0. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]], shape=(32, 136), dtype=float32)
+
+
+

Two ways to manage preprocessing: as part of the +tf.data pipeline, or in the model itself +

+

There are two ways in which you can leverage your +FeatureSpace:

+
+

Asynchronous preprocessing in tf.data +

+

You can make it part of your data pipeline, before the model. This +enables asynchronous parallel preprocessing of the data on CPU before it +hits the model. Do this if you’re training on GPU or TPU, or if you want +to speed up preprocessing. Usually, this is always the right thing to do +during training.

+
+
+

Synchronous preprocessing in the model +

+

You can make it part of your model. This means that the model will +expect dicts of raw feature values, and the preprocessing batch will be +done synchronously (in a blocking manner) before the rest of the forward +pass. Do this if you want to have an end-to-end model that can process +raw feature values – but keep in mind that your model will only be able +to run on CPU, since most types of feature preprocessing (e.g. string +preprocessing) are not GPU or TPU compatible.

+

Do not do this on GPU / TPU or in performance-sensitive settings. In +general, you want to do in-model preprocessing when you do inference on +CPU.

+

In our case, we will apply the FeatureSpace in the +tf.data pipeline during training, but we will do inference with an +end-to-end model that includes the FeatureSpace.

+

Let’s create a training and validation dataset of preprocessed +batches:

+
+preprocessed_train_ds <- train_ds |>
+  dataset_map(\(x, y) list(feature_space(x), y),
+              num_parallel_calls = tf$data$AUTOTUNE) |>
+  dataset_prefetch(tf$data$AUTOTUNE)
+
+preprocessed_val_ds <- val_ds |>
+  dataset_map(\(x, y) list(feature_space(x), y),
+              num_parallel_calls = tf$data$AUTOTUNE) |>
+  dataset_prefetch(tf$data$AUTOTUNE)
+
+
+
+

Build a model +

+

Time to build a model – or rather two models:

+
    +
  • A training model that expects preprocessed features (one sample = +one vector)
  • +
  • An inference model that expects raw features (one sample = dict of +raw feature values)
  • +
+
+dict_inputs <- feature_space$get_inputs()
+encoded_features <- feature_space$get_encoded_features()
+
+predictions <- encoded_features |>
+  layer_dense(32, activation="relu") |>
+  layer_dropout(0.5) |>
+  layer_dense(1, activation="sigmoid")
+
+training_model <- keras_model(inputs = encoded_features,
+                              outputs = predictions)
+training_model |> compile(optimizer = "adam",
+                          loss = "binary_crossentropy",
+                          metrics = "accuracy")
+
+inference_model <- keras_model(inputs = dict_inputs,
+                               outputs = predictions)
+
+
+

Train the model +

+

Let’s train our model for 20 epochs. Note that feature preprocessing +is happening as part of the tf.data pipeline, not as part of the +model.

+
+training_model |> fit(
+  preprocessed_train_ds,
+  epochs = 20,
+  validation_data = preprocessed_val_ds,
+  verbose = 2
+)
+
## Epoch 1/20
+## 8/8 - 3s - 320ms/step - accuracy: 0.4564 - loss: 0.7517 - val_accuracy: 0.4833 - val_loss: 0.7192
+## Epoch 2/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.5436 - loss: 0.6978 - val_accuracy: 0.6167 - val_loss: 0.6730
+## Epoch 3/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.6473 - loss: 0.6429 - val_accuracy: 0.6500 - val_loss: 0.6319
+## Epoch 4/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.6639 - loss: 0.6126 - val_accuracy: 0.6833 - val_loss: 0.5965
+## Epoch 5/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.7137 - loss: 0.5854 - val_accuracy: 0.7167 - val_loss: 0.5653
+## Epoch 6/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.7386 - loss: 0.5565 - val_accuracy: 0.7667 - val_loss: 0.5353
+## Epoch 7/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.7718 - loss: 0.5237 - val_accuracy: 0.7667 - val_loss: 0.5086
+## Epoch 8/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.7718 - loss: 0.5098 - val_accuracy: 0.8167 - val_loss: 0.4835
+## Epoch 9/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8091 - loss: 0.4921 - val_accuracy: 0.8500 - val_loss: 0.4621
+## Epoch 10/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.8050 - loss: 0.4562 - val_accuracy: 0.8333 - val_loss: 0.4435
+## Epoch 11/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8091 - loss: 0.4398 - val_accuracy: 0.8333 - val_loss: 0.4261
+## Epoch 12/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8257 - loss: 0.4272 - val_accuracy: 0.8167 - val_loss: 0.4095
+## Epoch 13/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8050 - loss: 0.4381 - val_accuracy: 0.8333 - val_loss: 0.3949
+## Epoch 14/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8091 - loss: 0.4118 - val_accuracy: 0.8333 - val_loss: 0.3828
+## Epoch 15/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.8382 - loss: 0.3797 - val_accuracy: 0.8500 - val_loss: 0.3712
+## Epoch 16/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8423 - loss: 0.3884 - val_accuracy: 0.8500 - val_loss: 0.3610
+## Epoch 17/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.8548 - loss: 0.3654 - val_accuracy: 0.8500 - val_loss: 0.3511
+## Epoch 18/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8299 - loss: 0.3726 - val_accuracy: 0.8667 - val_loss: 0.3434
+## Epoch 19/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.8672 - loss: 0.3472 - val_accuracy: 0.8667 - val_loss: 0.3365
+## Epoch 20/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.8714 - loss: 0.3380 - val_accuracy: 0.8667 - val_loss: 0.3301
+

We quickly get to 80% validation accuracy.

+
+
+

Inference on new data with the end-to-end model +

+

Now, we can use our inference model (which includes the +FeatureSpace) to make predictions based on dicts of raw +features values, as follows:

+
+sample <- list(
+  age = 60,
+  sex = 1,
+  cp = 1,
+  trestbps = 145,
+  chol = 233,
+  fbs = 1,
+  restecg = 2,
+  thalach = 150,
+  exang = 0,
+  oldpeak = 2.3,
+  slope = 3,
+  ca = 0,
+  thal = "fixed"
+)
+
+input_dict <- lapply(sample, \(x) op_convert_to_tensor(array(x)))
+predictions <- inference_model |> predict(input_dict)
+
## 1/1 - 0s - 257ms/step
+
+glue::glue(r"---(
+  This particular patient had a {(100 * predictions) |> signif(3)}% probability
+  of having a heart disease, as evaluated by our model.
+)---")
+
## This particular patient had a 48.9% probability
+## of having a heart disease, as evaluated by our model.
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/structured_data_classification_with_feature_space.html b/docs/articles/examples/structured_data_classification_with_feature_space.html new file mode 100644 index 0000000000..c1cfb913fc --- /dev/null +++ b/docs/articles/examples/structured_data_classification_with_feature_space.html @@ -0,0 +1,677 @@ + + + + + + + + +Structured data classification with FeatureSpace • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This example demonstrates how to do structured data classification +(also known as tabular data classification), starting from a raw CSV +file. Our data includes numerical features, and integer categorical +features, and string categorical features. We will use the utility +layer_feature_space() to index, preprocess, and encode our +features.

+

The code is adapted from the example Structured +data classification from scratch. While the previous example managed +its own low-level feature preprocessing and encoding with Keras +preprocessing layers, in this example we delegate everything to +layer_feature_space(), making the workflow extremely quick +and easy.

+
+

The dataset +

+

Our +dataset is provided by the Cleveland Clinic Foundation for Heart +Disease. It’s a CSV file with 303 rows. Each row contains information +about a patient (a sample), and each column describes +an attribute of the patient (a feature). We use the +features to predict whether a patient has a heart disease +(binary classification).

+

Here’s the description of each feature:

+ +++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ColumnDescriptionFeature Type
AgeAge in yearsNumerical
Sex(1 = male; 0 = female)Categorical
CPChest pain type (0, 1, 2, 3, 4)Categorical
TrestbpdResting blood pressure (in mm Hg on admission)Numerical
CholSerum cholesterol in mg/dlNumerical
FBSfasting blood sugar in 120 mg/dl (1 = true; 0 = false)Categorical
RestECGResting electrocardiogram results (0, 1, 2)Categorical
ThalachMaximum heart rate achievedNumerical
ExangExercise induced angina (1 = yes; 0 = no)Categorical
OldpeakST depression induced by exercise relative to restNumerical
SlopeSlope of the peak exercise ST segmentNumerical
CANumber of major vessels (0-3) colored by fluoroscopyBoth numerical & categorical
Thal3 = normal; 6 = fixed defect; 7 = reversible defectCategorical
TargetDiagnosis of heart disease (1 = true; 0 = false)Target
+
+
+
+

Setup +

+
+library(readr)
+library(dplyr, warn.conflicts = FALSE)
+library(keras3)
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+
+conflicted::conflicts_prefer(
+  keras3::shape(),
+  keras3::set_random_seed(),
+  dplyr::filter(),
+  .quiet = TRUE
+)
+
+use_backend("tensorflow")
+
+
+

Preparing the data +

+

Let’s download the data and load it into a Pandas dataframe:

+
+file_url <-
+  "http://storage.googleapis.com/download.tensorflow.org/data/heart.csv"
+df <- read_csv(file_url, col_types = cols(
+  oldpeak = col_double(),
+  thal = col_character(),
+  .default = col_integer()
+))
+
+# the dataset has two malformed rows, filter them out
+df <- df |> filter(!thal %in% c("1", "2"))
+

The dataset includes 303 samples with 14 columns per sample (13 +features, plus the target label)

+
+glimpse(df)
+
## Rows: 301
+## Columns: 14
+## $ age      <int> 63, 67, 67, 37, 41, 56, 62, 57, 63, 53, 57, 56, 56, 44, 5…
+## $ sex      <int> 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, …
+## $ cp       <int> 1, 4, 4, 3, 2, 2, 4, 4, 4, 4, 4, 2, 3, 2, 3, 3, 2, 4, 3, …
+## $ trestbps <int> 145, 160, 120, 130, 130, 120, 140, 120, 130, 140, 140, 14…
+## $ chol     <int> 233, 286, 229, 250, 204, 236, 268, 354, 254, 203, 192, 29…
+## $ fbs      <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, …
+## $ restecg  <int> 2, 2, 2, 0, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 0, 0, 0, …
+## $ thalach  <int> 150, 108, 129, 187, 172, 178, 160, 163, 147, 155, 148, 15…
+## $ exang    <int> 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
+## $ oldpeak  <dbl> 2.3, 1.5, 2.6, 3.5, 1.4, 0.8, 3.6, 0.6, 1.4, 3.1, 0.4, 1.…
+## $ slope    <int> 3, 2, 2, 3, 1, 1, 3, 1, 2, 3, 2, 2, 2, 1, 1, 1, 3, 1, 1, …
+## $ ca       <int> 0, 3, 2, 0, 0, 0, 2, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
+## $ thal     <chr> "fixed", "normal", "reversible", "normal", "normal", "nor…
+## $ target   <int> 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, …
+

Here’s a preview of a few samples:

+
+df
+
## # A tibble: 301 × 14
+##      age   sex    cp trestbps  chol   fbs restecg thalach exang oldpeak
+##    <int> <int> <int>    <int> <int> <int>   <int>   <int> <int>   <dbl>
+##  1    63     1     1      145   233     1       2     150     0     2.3
+##  2    67     1     4      160   286     0       2     108     1     1.5
+##  3    67     1     4      120   229     0       2     129     1     2.6
+##  4    37     1     3      130   250     0       0     187     0     3.5
+##  5    41     0     2      130   204     0       2     172     0     1.4
+##  6    56     1     2      120   236     0       0     178     0     0.8
+##  7    62     0     4      140   268     0       2     160     0     3.6
+##  8    57     0     4      120   354     0       0     163     1     0.6
+##  9    63     1     4      130   254     0       2     147     0     1.4
+## 10    53     1     4      140   203     1       2     155     1     3.1
+## # ℹ 291 more rows
+## # ℹ 4 more variables: slope <int>, ca <int>, thal <chr>, target <int>
+

The last column, “target”, indicates whether the patient has a heart +disease (1) or not (0).

+

Let’s split the data into a training and validation set:

+
+val_idx <- nrow(df) %>% sample.int(., . * 0.2)
+val_df <- df[val_idx, ]
+train_df <- df[-val_idx, ]
+
+cat(sprintf(
+  "Using %d samples for training and %d for validation",
+  nrow(train_df), nrow(val_df)
+))
+
## Using 241 samples for training and 60 for validation
+

Let’s generate tf_dataset objects for each +dataframe:

+
+dataframe_to_dataset <- function(df) {
+  labels <- df |> pull(target) |> as.integer()
+  inputs <- df |> select(-target) |> as.list()
+
+  ds <- tensor_slices_dataset(list(inputs, labels)) |>
+    dataset_shuffle(nrow(df))
+
+  ds
+}
+
+train_ds <- dataframe_to_dataset(train_df)
+val_ds <- dataframe_to_dataset(val_df)
+

Each tf_dataset yields a tuple +(input, target) where input is a dictionary (a +named list) of features and target is the value +0 or 1:

+
+c(x, y) %<-% iter_next(as_iterator(train_ds))
+cat("Input: "); str(x)
+cat("Target: "); str(y)
+
## Input: List of 13
+##  $ age     :<tf.Tensor: shape=(), dtype=int32, numpy=57>
+##  $ sex     :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ cp      :<tf.Tensor: shape=(), dtype=int32, numpy=4>
+##  $ trestbps:<tf.Tensor: shape=(), dtype=int32, numpy=140>
+##  $ chol    :<tf.Tensor: shape=(), dtype=int32, numpy=241>
+##  $ fbs     :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ restecg :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ thalach :<tf.Tensor: shape=(), dtype=int32, numpy=123>
+##  $ exang   :<tf.Tensor: shape=(), dtype=int32, numpy=1>
+##  $ oldpeak :<tf.Tensor: shape=(), dtype=float32, numpy=0.2>
+##  $ slope   :<tf.Tensor: shape=(), dtype=int32, numpy=2>
+##  $ ca      :<tf.Tensor: shape=(), dtype=int32, numpy=0>
+##  $ thal    :<tf.Tensor: shape=(), dtype=string, numpy=b'reversible'>
+## Target: <tf.Tensor: shape=(), dtype=int32, numpy=0>
+

Let’s batch the datasets:

+
+train_ds <- train_ds |> dataset_batch(32)
+val_ds <- val_ds |> dataset_batch(32)
+
+
+

Configuring a FeatureSpace +

+

To configure how each feature should be preprocessed, we instantiate +a layer_feature_space, and we pass to it a dictionary +(named list with unique names) that maps the name of our features to a +string that describes the feature type.

+

We have a few “integer categorical” features such as +"FBS", one “string categorical” feature +("thal"), and a few numerical features, which we’d like to +normalize – except "age", which we’d like to discretize +into a number of bins.

+

We also use the crosses argument to capture feature +interactions for some categorical features, that is to say, create +additional features that represent value co-occurrences for these +categorical features. You can compute feature crosses like this for +arbitrary sets of categorical features – not just tuples of two +features. Because the resulting co-occurences are hashed into a +fixed-sized vector, you don’t need to worry about whether the +co-occurence space is too large.

+
+feature_space <- layer_feature_space(
+  features = list(
+    # Categorical features encoded as integers
+    sex = "integer_categorical",
+    cp = "integer_categorical",
+    fbs = "integer_categorical",
+    restecg = "integer_categorical",
+    exang = "integer_categorical",
+    ca = "integer_categorical",
+    # Categorical feature encoded as string
+    thal = "string_categorical",
+    # Numerical features to discretize
+    age = "float_discretized",
+    # Numerical features to normalize
+    trestbps = "float_normalized",
+    chol = "float_normalized",
+    thalach = "float_normalized",
+    oldpeak = "float_normalized",
+    slope = "float_normalized"
+  ),
+  # We create additional features by hashing
+  # value co-occurrences for the
+  # following groups of categorical features.
+  crosses = list(c("sex", "age"), c("thal", "ca")),
+  # The hashing space for these co-occurrences
+  # wil be 32-dimensional.
+  crossing_dim = 32,
+  # Our utility will one-hot encode all categorical
+  # features and concat all features into a single
+  # vector (one vector per sample).
+  output_mode = "concat"
+)
+
+
+

Further customizing a FeatureSpace +

+

Specifying the feature type via a string name is quick and easy, but +sometimes you may want to further configure the preprocessing of each +feature. For instance, in our case, our categorical features don’t have +a large set of possible values – it’s only a handful of values per +feature (e.g. 1 and 0 for the feature +"FBS"), and all possible values are represented in the +training set. As a result, we don’t need to reserve an index to +represent “out of vocabulary” values for these features – which would +have been the default behavior. Below, we just specify +num_oov_indices=0 in each of these features to tell the +feature preprocessor to skip “out of vocabulary” indexing.

+

Other customizations you have access to include specifying the number +of bins for discretizing features of type +"float_discretized", or the dimensionality of the hashing +space for feature crossing.

+
+feature_space <- layer_feature_space(
+  features = list(
+    # Categorical features encoded as integers
+    sex       = feature_integer_categorical(num_oov_indices = 0),
+    cp        = feature_integer_categorical(num_oov_indices = 0),
+    fbs       = feature_integer_categorical(num_oov_indices = 0),
+    restecg   = feature_integer_categorical(num_oov_indices = 0),
+    exang     = feature_integer_categorical(num_oov_indices = 0),
+    ca        = feature_integer_categorical(num_oov_indices = 0),
+    # Categorical feature encoded as string
+    thal      = feature_string_categorical(num_oov_indices = 0),
+    # Numerical features to discretize
+    age       = feature_float_discretized(num_bins = 30),
+    # Numerical features to normalize
+    trestbps  = feature_float_normalized(),
+    chol      = feature_float_normalized(),
+    thalach   = feature_float_normalized(),
+    oldpeak   = feature_float_normalized(),
+    slope     = feature_float_normalized()
+  ),
+  # Specify feature cross with a custom crossing dim.
+  crosses = list(
+    feature_cross(
+      feature_names = c("sex", "age"),
+      crossing_dim = 64
+    ),
+    feature_cross(
+      feature_names = c("thal", "ca"),
+      crossing_dim = 16
+    )
+  ),
+  output_mode = "concat"
+)
+
+
+

Adapt the FeatureSpace to the training data +

+

Before we start using the FeatureSpace to build a model, +we have to adapt it to the training data. During adapt(), +the FeatureSpace will:

+
    +
  • Index the set of possible values for categorical features.
  • +
  • Compute the mean and variance for numerical features to +normalize.
  • +
  • Compute the value boundaries for the different bins for numerical +features to discretize.
  • +
+

Note that adapt() should be called on a +tf_dataset which yields dicts (named lists) of feature +values – no labels.

+
+train_ds_with_no_labels <- train_ds |> dataset_map(\(x, y) x)
+feature_space |> adapt(train_ds_with_no_labels)
+

At this point, the FeatureSpace can be called on a dict +of raw feature values, and will return a single concatenate vector for +each sample, combining encoded features and feature crosses.

+
+c(x, y) %<-% iter_next(as_iterator(train_ds))
+preprocessed_x <- feature_space(x)
+preprocessed_x
+
## tf.Tensor(
+## [[0. 0. 0. ... 1. 0. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]
+##  ...
+##  [0. 0. 0. ... 0. 0. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]
+##  [0. 0. 0. ... 0. 0. 0.]], shape=(32, 136), dtype=float32)
+
+
+

Two ways to manage preprocessing: as part of the +tf.data pipeline, or in the model itself +

+

There are two ways in which you can leverage your +FeatureSpace:

+
+

Asynchronous preprocessing in tf.data +

+

You can make it part of your data pipeline, before the model. This +enables asynchronous parallel preprocessing of the data on CPU before it +hits the model. Do this if you’re training on GPU or TPU, or if you want +to speed up preprocessing. Usually, this is always the right thing to do +during training.

+
+
+

Synchronous preprocessing in the model +

+

You can make it part of your model. This means that the model will +expect dicts of raw feature values, and the preprocessing batch will be +done synchronously (in a blocking manner) before the rest of the forward +pass. Do this if you want to have an end-to-end model that can process +raw feature values – but keep in mind that your model will only be able +to run on CPU, since most types of feature preprocessing (e.g. string +preprocessing) are not GPU or TPU compatible.

+

Do not do this on GPU / TPU or in performance-sensitive settings. In +general, you want to do in-model preprocessing when you do inference on +CPU.

+

In our case, we will apply the FeatureSpace in the +tf.data pipeline during training, but we will do inference with an +end-to-end model that includes the FeatureSpace.

+

Let’s create a training and validation dataset of preprocessed +batches:

+
+preprocessed_train_ds <- train_ds |>
+  dataset_map(\(x, y) list(feature_space(x), y),
+              num_parallel_calls = tf$data$AUTOTUNE) |>
+  dataset_prefetch(tf$data$AUTOTUNE)
+
+preprocessed_val_ds <- val_ds |>
+  dataset_map(\(x, y) list(feature_space(x), y),
+              num_parallel_calls = tf$data$AUTOTUNE) |>
+  dataset_prefetch(tf$data$AUTOTUNE)
+
+
+
+

Build a model +

+

Time to build a model – or rather two models:

+
    +
  • A training model that expects preprocessed features (one sample = +one vector)
  • +
  • An inference model that expects raw features (one sample = dict of +raw feature values)
  • +
+
+dict_inputs <- feature_space$get_inputs()
+encoded_features <- feature_space$get_encoded_features()
+
+predictions <- encoded_features |>
+  layer_dense(32, activation="relu") |>
+  layer_dropout(0.5) |>
+  layer_dense(1, activation="sigmoid")
+
+training_model <- keras_model(inputs = encoded_features,
+                              outputs = predictions)
+training_model |> compile(optimizer = "adam",
+                          loss = "binary_crossentropy",
+                          metrics = "accuracy")
+
+inference_model <- keras_model(inputs = dict_inputs,
+                               outputs = predictions)
+
+
+

Train the model +

+

Let’s train our model for 20 epochs. Note that feature preprocessing +is happening as part of the tf.data pipeline, not as part of the +model.

+
+training_model |> fit(
+  preprocessed_train_ds,
+  epochs = 20,
+  validation_data = preprocessed_val_ds,
+  verbose = 2
+)
+
## Epoch 1/20
+## 8/8 - 3s - 343ms/step - accuracy: 0.5685 - loss: 0.7120 - val_accuracy: 0.6500 - val_loss: 0.6607
+## Epoch 2/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.6722 - loss: 0.6131 - val_accuracy: 0.6500 - val_loss: 0.6212
+## Epoch 3/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.7137 - loss: 0.5685 - val_accuracy: 0.6500 - val_loss: 0.5961
+## Epoch 4/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.7718 - loss: 0.4981 - val_accuracy: 0.6667 - val_loss: 0.5758
+## Epoch 5/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.7635 - loss: 0.4804 - val_accuracy: 0.6500 - val_loss: 0.5575
+## Epoch 6/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.7676 - loss: 0.4739 - val_accuracy: 0.6667 - val_loss: 0.5309
+## Epoch 7/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.7842 - loss: 0.4141 - val_accuracy: 0.6667 - val_loss: 0.5161
+## Epoch 8/20
+## 8/8 - 0s - 13ms/step - accuracy: 0.8133 - loss: 0.3841 - val_accuracy: 0.6667 - val_loss: 0.5080
+## Epoch 9/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8133 - loss: 0.3997 - val_accuracy: 0.6667 - val_loss: 0.4980
+## Epoch 10/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8340 - loss: 0.3618 - val_accuracy: 0.6833 - val_loss: 0.4882
+## Epoch 11/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8548 - loss: 0.3335 - val_accuracy: 0.6833 - val_loss: 0.4814
+## Epoch 12/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8465 - loss: 0.3535 - val_accuracy: 0.6833 - val_loss: 0.4818
+## Epoch 13/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8340 - loss: 0.3363 - val_accuracy: 0.7000 - val_loss: 0.4755
+## Epoch 14/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8548 - loss: 0.3224 - val_accuracy: 0.7333 - val_loss: 0.4745
+## Epoch 15/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8714 - loss: 0.3083 - val_accuracy: 0.7500 - val_loss: 0.4813
+## Epoch 16/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8797 - loss: 0.3270 - val_accuracy: 0.7500 - val_loss: 0.4690
+## Epoch 17/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8631 - loss: 0.2955 - val_accuracy: 0.7500 - val_loss: 0.4782
+## Epoch 18/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8797 - loss: 0.2831 - val_accuracy: 0.7667 - val_loss: 0.4818
+## Epoch 19/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8797 - loss: 0.2872 - val_accuracy: 0.7667 - val_loss: 0.4767
+## Epoch 20/20
+## 8/8 - 0s - 12ms/step - accuracy: 0.8631 - loss: 0.2877 - val_accuracy: 0.7833 - val_loss: 0.4781
+

We quickly get to 80% validation accuracy.

+
+
+

Inference on new data with the end-to-end model +

+

Now, we can use our inference model (which includes the +FeatureSpace) to make predictions based on dicts of raw +features values, as follows:

+
+sample <- list(
+  age = 60,
+  sex = 1,
+  cp = 1,
+  trestbps = 145,
+  chol = 233,
+  fbs = 1,
+  restecg = 2,
+  thalach = 150,
+  exang = 0,
+  oldpeak = 2.3,
+  slope = 3,
+  ca = 0,
+  thal = "fixed"
+)
+
+input_dict <- lapply(sample, \(x) op_convert_to_tensor(array(x)))
+predictions <- inference_model |> predict(input_dict)
+
## 1/1 - 1s - 548ms/step
+
+glue::glue(r"---(
+  This particular patient had a {(100 * predictions) |> signif(3)}% probability
+  of having a heart disease, as evaluated by our model.
+)---")
+
## This particular patient had a 55% probability
+## of having a heart disease, as evaluated by our model.
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/text_classification_from_scratch.html b/docs/articles/examples/text_classification_from_scratch.html new file mode 100644 index 0000000000..14444ab6d0 --- /dev/null +++ b/docs/articles/examples/text_classification_from_scratch.html @@ -0,0 +1,476 @@ + + + + + + + + +Text classification from scratch • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This example shows how to do text classification starting from raw +text (as a set of text files on disk). We demonstrate the workflow on +the IMDB sentiment classification dataset (unprocessed version). We use +[layer_text_vectorization()] for word splitting & +indexing.

+
+
+

Setup +

+
+options(conflicts.policy = "strict")
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+library(keras3)
+use_virtualenv("r-keras")
+
+
+

Load the data: IMDB movie review sentiment classification +

+

Let’s download the data and inspect its structure.

+
+if (!dir.exists("datasets/aclImdb")) {
+  dir.create("datasets")
+  download.file(
+    "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz",
+    "datasets/aclImdb_v1.tar.gz"
+  )
+  untar("datasets/aclImdb_v1.tar.gz", exdir = "datasets")
+  unlink("datasets/aclImdb/train/unsup", recursive = TRUE)
+}
+

The aclImdb folder contains a train and +test subfolder:

+
+head(list.files("datasets/aclImdb/test"))
+
## [1] "labeledBow.feat" "neg"             "pos"             "urls_neg.txt"
+## [5] "urls_pos.txt"
+
+head(list.files("datasets/aclImdb/train"))
+
## [1] "labeledBow.feat" "neg"             "pos"             "unsupBow.feat"
+## [5] "urls_neg.txt"    "urls_pos.txt"
+

The aclImdb/train/pos and aclImdb/train/neg +folders contain text files, each of which represents one review (either +positive or negative):

+
+cat(readLines("datasets/aclImdb/train/pos/6248_7.txt"))
+
## Being an Austrian myself this has been a straight knock in my face. Fortunately I don't live nowhere near the place where this movie takes place but unfortunately it portrays everything that the rest of Austria hates about Viennese people (or people close to that region). And it is very easy to read that this is exactly the directors intention: to let your head sink into your hands and say "Oh my god, how can THAT be possible!". No, not with me, the (in my opinion) totally exaggerated uncensored swinger club scene is not necessary, I watch porn, sure, but in this context I was rather disgusted than put in the right context.<br /><br />This movie tells a story about how misled people who suffer from lack of education or bad company try to survive and live in a world of redundancy and boring horizons. A girl who is treated like a whore by her super-jealous boyfriend (and still keeps coming back), a female teacher who discovers her masochism by putting the life of her super-cruel "lover" on the line, an old couple who has an almost mathematical daily cycle (she is the "official replacement" of his ex wife), a couple that has just divorced and has the ex husband suffer under the acts of his former wife obviously having a relationship with her masseuse and finally a crazy hitchhiker who asks her drivers the most unusual questions and stretches their nerves by just being super-annoying.<br /><br />After having seen it you feel almost nothing. You're not even shocked, sad, depressed or feel like doing anything... Maybe that's why I gave it 7 points, it made me react in a way I never reacted before. If that's good or bad is up to you!
+

We are only interested in the pos and neg +subfolders, so let’s delete the other subfolder that has text files in +it:

+
+unlink("datasets/aclImdb/train/unsup", recursive = TRUE)
+

You can use the utility text_dataset_from_directory() to +generate a labeled tf_dataset object from a set of text +files on disk filed into class-specific folders.

+

Let’s use it to generate the training, validation, and test datasets. +The validation and training datasets are generated from two subsets of +the train directory, with 20% of samples going to the +validation dataset and 80% going to the training dataset.

+

Having a validation dataset in addition to the test dataset is useful +for tuning hyperparameters, such as the model architecture, for which +the test dataset should not be used.

+

Before putting the model out into the real world however, it should +be retrained using all available training data (without creating a +validation dataset), so its performance is maximized.

+

When using the validation_split and subset +arguments, make sure to either specify a random seed, or to pass +shuffle=FALSE, so that the validation & training splits +you get have no overlap.

+
+batch_size <- 32
+
+raw_train_ds <- text_dataset_from_directory(
+  "datasets/aclImdb/train",
+  batch_size = batch_size,
+  validation_split = 0.2,
+  subset = "training",
+  seed = 1337
+)
+
## Found 25000 files belonging to 2 classes.
+## Using 20000 files for training.
+
+raw_val_ds <- text_dataset_from_directory(
+  "datasets/aclImdb/train",
+  batch_size = batch_size,
+  validation_split = 0.2,
+  subset = "validation",
+  seed = 1337
+)
+
## Found 25000 files belonging to 2 classes.
+## Using 5000 files for validation.
+
+raw_test_ds <- text_dataset_from_directory(
+  "datasets/aclImdb/test",
+  batch_size = batch_size
+)
+
## Found 25000 files belonging to 2 classes.
+
+cat("Number of batches in raw_train_ds:", length(raw_train_ds), "\n")
+
## Number of batches in raw_train_ds: 625
+
+cat("Number of batches in raw_val_ds:", length(raw_val_ds), "\n")
+
## Number of batches in raw_val_ds: 157
+
+cat("Number of batches in raw_test_ds:", length(raw_test_ds), "\n")
+
## Number of batches in raw_test_ds: 782
+

Let’s preview a few samples:

+
+# It's important to take a look at your raw data to ensure your normalization
+# and tokenization will work as expected. We can do that by taking a few
+# examples from the training set and looking at them.
+# This is one of the places where eager execution shines:
+# we can just evaluate these tensors using .numpy()
+# instead of needing to evaluate them in a Session/Graph context.
+batch <- iter_next(as_iterator(raw_train_ds))
+str(batch)
+
## List of 2
+##  $ :<tf.Tensor: shape=(32), dtype=string, numpy=…>
+##  $ :<tf.Tensor: shape=(32), dtype=int32, numpy=…>
+
+c(text_batch, label_batch) %<-% batch
+for (i in 1:3) {
+  print(text_batch[i])
+  print(label_batch[i])
+}
+
## tf.Tensor(b"I have read the novel Reaper of Ben Mezrich a fews years ago and last night I accidentally came to see this adaption.<br /><br />Although it's been years since I read the story the first time, the differences between the novel and the movie are humongous. Very important elements, which made the whole thing plausible are just written out or changed to bad.<br /><br />If the plot sounds interesting to you: go and get the novel. Its much, much, much better.<br /><br />Still 4 out of 10 since it was hard to stop watching because of the great basic plot by Ben Mezrich.", shape=(), dtype=string)
+## tf.Tensor(0, shape=(), dtype=int32)
+## tf.Tensor(b'After seeing all the Jesse James, Quantrill, jayhawkers,etc films in the fifties, it is quite a thrill to see this film with a new perspective by director Ang Lee. The scene of the attack of Lawrence, Kansas is awesome. The romantic relationship between Jewel and Toby Mcguire turns out to be one of the best parts and Jonathan Rhys-Meyers is outstanding as the bad guy. All the time this film makes you feel the horror of war, and the desperate situation of the main characters who do not know if they are going to survive the next hours. Definitely worth seeing.', shape=(), dtype=string)
+## tf.Tensor(1, shape=(), dtype=int32)
+## tf.Tensor(b'AG was an excellent presentation of drama, suspense and thriller that is so rare to American TV. Sheriff Lucas gave many a viewer the willies. We rooted for Caleb as he strove to resist the overtures of Sheriff Lucas. We became engrossed and fearful upon learning of the unthinkable connection between these two characters. The manipulations which weekly gave cause to fear what Lucas would do next were truly surprising. This show lived up to the "Gothic" moniker in ways American entertainment has so seldom attempted, much less mastered. The suits definitely made a big mistake in not supporting this show. This show puts shame to the current glut of "reality" shows- which are so less than satisfying viewing.The call for a DVD box set is well based. This show is quality viewing for a discerning market hungry for quality viewing. A public that is tiring of over-saturation of mind-numbing reality fare will welcome this gem of real storytelling. Bring on the DVD box set!!', shape=(), dtype=string)
+## tf.Tensor(1, shape=(), dtype=int32)
+
+
+

Prepare the data +

+

In particular, we remove <br /> tags.

+
+# Having looked at our data above, we see that the raw text contains HTML break
+# tags of the form '<br />'. These tags will not be removed by the default
+# standardizer (which doesn't strip HTML). Because of this, we will need to
+# create a custom standardization function.
+custom_standardization_fn <- function(string_tensor) {
+  string_tensor |>
+    tf$strings$lower() |> # convert to all lowercase
+    tf$strings$regex_replace("<br />", " ") |> # remove '<br />' HTML tag
+    tf$strings$regex_replace("[[:punct:]]", "") # remove punctuation
+}
+
+
+# Model constants.
+max_features <- 20000
+embedding_dim <- 128
+sequence_length <- 500
+
+# Now that we have our custom standardization, we can instantiate our text
+# vectorization layer. We are using this layer to normalize, split, and map
+# strings to integers, so we set our 'output_mode' to 'int'.
+# Note that we're using the default split function,
+# and the custom standardization defined above.
+# We also set an explicit maximum sequence length, since the CNNs later in our
+# model won't support ragged sequences.
+vectorize_layer <- layer_text_vectorization(
+  standardize = custom_standardization_fn,
+  max_tokens = max_features,
+  output_mode = "int",
+  output_sequence_length = sequence_length,
+)
+
+# Now that the vectorize_layer has been created, call `adapt` on a text-only
+# dataset to create the vocabulary. You don't have to batch, but for very large
+# datasets this means you're not keeping spare copies of the dataset in memory.
+
+# Let's make a text-only dataset (no labels):
+text_ds <- raw_train_ds |>
+  dataset_map(\(x, y) x)
+# Let's call `adapt`:
+vectorize_layer |> adapt(text_ds)
+
+
+

Two options to vectorize the data +

+

There are 2 ways we can use our text vectorization layer:

+

Option 1: Make it part of the model, so as to obtain +a model that processes raw strings, like this:

+
+text_input <- keras_input(shape = c(1L), dtype = "string", name = 'text')
+x <- text_input |>
+  vectorize_layer() |>
+  layer_embedding(max_features + 1, embedding_dim)
+

Option 2: Apply it to the text dataset to obtain a +dataset of word indices, then feed it into a model that expects integer +sequences as inputs.

+

An important difference between the two is that option 2 enables you +to do asynchronous CPU processing and buffering of your +data when training on GPU. So if you’re training the model on GPU, you +probably want to go with this option to get the best performance. This +is what we will do below.

+

If we were to export our model to production, we’d ship a model that +accepts raw strings as input, like in the code snippet for option 1 +above. This can be done after training. We do this in the last +section.

+
+vectorize_text <- function(text, label) {
+  text <- text |>
+    op_expand_dims(-1) |>
+    vectorize_layer()
+  list(text, label)
+}
+
+# Vectorize the data.
+train_ds <- raw_train_ds |> dataset_map(vectorize_text)
+val_ds   <- raw_val_ds   |> dataset_map(vectorize_text)
+test_ds  <- raw_test_ds  |> dataset_map(vectorize_text)
+
+# Do async prefetching / buffering of the data for best performance on GPU.
+train_ds <- train_ds |>
+  dataset_cache() |>
+  dataset_prefetch(buffer_size = 10)
+val_ds <- val_ds |>
+  dataset_cache() |>
+  dataset_prefetch(buffer_size = 10)
+test_ds <- test_ds |>
+  dataset_cache() |>
+  dataset_prefetch(buffer_size = 10)
+
+
+

Build a model +

+

We choose a simple 1D convnet starting with an Embedding +layer.

+
+# A integer input for vocab indices.
+inputs <- keras_input(shape = c(NA), dtype = "int64")
+
+predictions <- inputs |>
+  # Next, we add a layer to map those vocab indices into a space of dimensionality
+  # 'embedding_dim'.
+  layer_embedding(max_features, embedding_dim) |>
+  layer_dropout(0.5) |>
+  # Conv1D + global max pooling
+  layer_conv_1d(128, 7, padding = "valid", activation = "relu", strides = 3) |>
+  layer_conv_1d(128, 7, padding = "valid", activation = "relu", strides = 3) |>
+  layer_global_max_pooling_1d() |>
+  # We add a vanilla hidden layer:
+  layer_dense(128, activation = "relu") |>
+  layer_dropout(0.5) |>
+  # We project onto a single unit output layer, and squash it with a sigmoid:
+  layer_dense(1, activation = "sigmoid", name = "predictions")
+
+model <- keras_model(inputs, predictions)
+
+summary(model)
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ input_layer (InputLayer)        │ (None, None)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ embedding_1 (Embedding)         │ (None, None, 128)      │     2,560,000
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout_1 (Dropout)             │ (None, None, 128)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d_1 (Conv1D)               │ (None, None, 128)      │       114,816
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d (Conv1D)                 │ (None, None, 128)      │       114,816
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_max_pooling1d            │ (None, 128)            │             0
+## │ (GlobalMaxPooling1D)            │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense (Dense)                   │ (None, 128)            │        16,512
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, 128)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ predictions (Dense)             │ (None, 1)              │           129
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 2,806,273 (10.71 MB)
+##  Trainable params: 2,806,273 (10.71 MB)
+##  Non-trainable params: 0 (0.00 B)
+
+# Compile the model with binary crossentropy loss and an adam optimizer.
+model |> compile(loss = "binary_crossentropy",
+                 optimizer = "adam",
+                 metrics = "accuracy")
+
+
+

Train the model +

+
+epochs <- 3
+
+# Fit the model using the train and test datasets.
+model |> fit(train_ds, validation_data = val_ds, epochs = epochs)
+
## Epoch 1/3
+## 625/625 - 6s - 9ms/step - accuracy: 0.7327 - loss: 0.4894 - val_accuracy: 0.8638 - val_loss: 0.3162
+## Epoch 2/3
+## 625/625 - 2s - 2ms/step - accuracy: 0.9116 - loss: 0.2230 - val_accuracy: 0.8780 - val_loss: 0.3131
+## Epoch 3/3
+## 625/625 - 2s - 2ms/step - accuracy: 0.9603 - loss: 0.1118 - val_accuracy: 0.8646 - val_loss: 0.3735
+
+
+

Evaluate the model on the test set +

+
+model |> evaluate(test_ds)
+
## 782/782 - 1s - 2ms/step - accuracy: 0.8532 - loss: 0.4071
+
## $accuracy
+## [1] 0.8532
+##
+## $loss
+## [1] 0.407085
+
+
+

Make an end-to-end model +

+

If you want to obtain a model capable of processing raw strings, you +can simply create a new model (using the weights we just trained):

+
+# A string input
+inputs <- keras_input(shape = c(1), dtype = "string")
+# Turn strings into vocab indices
+indices <- vectorize_layer(inputs)
+# Turn vocab indices into predictions
+outputs <- model(indices)
+
+# Our end to end model
+end_to_end_model <- keras_model(inputs, outputs)
+end_to_end_model |> compile(
+  loss = "binary_crossentropy",
+  optimizer = "adam",
+  metrics = c("accuracy")
+)
+
+# Test it with `raw_test_ds`, which yields raw strings
+end_to_end_model |> evaluate(raw_test_ds)
+
## 782/782 - 3s - 4ms/step - accuracy: 0.8532 - loss: 0.4063
+
## $accuracy
+## [1] 0.8532
+##
+## $loss
+## [1] 0.4062699
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection.html b/docs/articles/examples/timeseries/timeseries_anomaly_detection.html new file mode 100644 index 0000000000..17aac851b9 --- /dev/null +++ b/docs/articles/examples/timeseries/timeseries_anomaly_detection.html @@ -0,0 +1,607 @@ + + + + + + + + +Timeseries anomaly detection using an Autoencoder • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This script demonstrates how you can use a reconstruction +convolutional autoencoder model to detect anomalies in timeseries +data.

+
+
+

Setup +

+
+library(dplyr, include.only = c("mutate"))
+library(ggplot2)
+theme_set(theme_minimal())
+
+library(listarrays)
+library(tfdatasets, exclude = c("shape"))
+library(keras3)
+
+
+

Load the data +

+

We will use the Numenta Anomaly +Benchmark(NAB) dataset. It provides artificial timeseries data +containing labeled anomalous periods of behavior. Data are ordered, +timestamped, single-valued metrics.

+

We will use the art_daily_small_noise.csv file for +training and the art_daily_jumpsup.csv file for testing. +The simplicity of this dataset allows us to demonstrate anomaly +detection.

+
+get_data <- function(url_suffix) {
+  url_root <- "https://raw.githubusercontent.com/numenta/NAB/master/data/"
+  url <- paste0(url_root, url_suffix)
+  file <- get_file(origin = url) # cache file locally
+  # parse csv; 2 columns with types: datetime (T), double (d)
+  readr::read_csv(file, col_types = "Td")
+}
+
+df_small_noise   <- get_data("artificialNoAnomaly/art_daily_small_noise.csv")
+df_daily_jumpsup <- get_data("artificialWithAnomaly/art_daily_jumpsup.csv")
+
+
+

Quick look at the data +

+
+df_small_noise
+
## # A tibble: 4,032 × 2
+##    timestamp           value
+##    <dttm>              <dbl>
+##  1 2014-04-01 00:00:00  18.3
+##  2 2014-04-01 00:05:00  22.0
+##  3 2014-04-01 00:10:00  18.6
+##  4 2014-04-01 00:15:00  22.0
+##  5 2014-04-01 00:20:00  21.9
+##  6 2014-04-01 00:25:00  21.2
+##  7 2014-04-01 00:30:00  20.6
+##  8 2014-04-01 00:35:00  20.3
+##  9 2014-04-01 00:40:00  21.5
+## 10 2014-04-01 00:45:00  19.2
+## # ℹ 4,022 more rows
+
+df_daily_jumpsup
+
## # A tibble: 4,032 × 2
+##    timestamp           value
+##    <dttm>              <dbl>
+##  1 2014-04-01 00:00:00  19.8
+##  2 2014-04-01 00:05:00  20.5
+##  3 2014-04-01 00:10:00  20.0
+##  4 2014-04-01 00:15:00  21.5
+##  5 2014-04-01 00:20:00  20.2
+##  6 2014-04-01 00:25:00  19.9
+##  7 2014-04-01 00:30:00  21.7
+##  8 2014-04-01 00:35:00  20.9
+##  9 2014-04-01 00:40:00  18.4
+## 10 2014-04-01 00:45:00  18.7
+## # ℹ 4,022 more rows
+
+
+

Visualize the data +

+
+

Timeseries data without anomalies +

+

We will use the following data for training.

+
+plot_ts <- function(df) {
+  ggplot(df, aes(x = timestamp, y = value)) + geom_line() +
+    scale_x_datetime(date_breaks = "1 day", date_labels = "%b-%d")
+}
+
+plot_ts(df_small_noise) + ggtitle("Without Anomaly")
+
+plot of chunk unnamed-chunk-4
plot of chunk unnamed-chunk-4
+
+
+
+

Timeseries data with anomalies +

+

We will use the following data for testing and see if the sudden jump +up in the data is detected as an anomaly.

+
+plot_ts(df_daily_jumpsup) + ggtitle("With Anomaly")
+
+plot of chunk unnamed-chunk-5
plot of chunk unnamed-chunk-5
+
+
+
+
+

Prepare training data +

+

Get data values from the training timeseries data file and normalize +the value data. We have a value for every 5 +mins for 14 days.

+
    +
  • 24 * 60 / 5 = 288 timesteps per day +
  • +
  • 288 * 14 = 4032 data points in total
  • +
+
+df_train <- df_small_noise |>
+  mutate(value = (value - mean(value)) / sd(value))
+
+cat("Number of training samples:", nrow(df_train), "\n")
+
## Number of training samples: 4032
+
+

Create sequences +

+

Create sequences combining TIME_STEPS contiguous data +values from the training data.

+
+TIME_STEPS <- 288
+
+as_dataset <- function(df) {
+  x <- as.matrix(df$value)
+  ds <- timeseries_dataset_from_array(x, NULL, sequence_length = TIME_STEPS)
+  # Because the dataset is small, cast TF Dataset to an R array for convenience.
+  ds |> as_array_iterator() |> iterate() |> bind_on_rows()
+}
+
+x_train <- as_dataset(df_train)
+writeLines(sprintf("Training input shape: (%s)", toString(dim(x_train))))
+
## Training input shape: (3745, 288, 1)
+
+
+
+

Build a model +

+

We will build a convolutional reconstruction autoencoder model. The +model will take input of shape +(batch_size, sequence_length, num_features) and return +output of the same shape. In this case, sequence_length is +288 and num_features is 1.

+
+model <- keras_model_sequential(input_shape = c(TIME_STEPS, 1)) |>
+  layer_conv_1d(
+    filters = 32, kernel_size = 7, padding = "same",
+    strides = 2, activation = "relu"
+  ) |>
+  layer_dropout(rate = 0.2) |>
+  layer_conv_1d(
+    filters = 16, kernel_size = 7, padding = "same",
+    strides = 2, activation = "relu"
+  ) |>
+  layer_conv_1d_transpose(
+    filters = 16, kernel_size = 7, padding = "same",
+    strides = 2, activation = "relu"
+  ) |>
+  layer_dropout(rate = 0.2) |>
+  layer_conv_1d_transpose(
+    filters = 32, kernel_size = 7, padding = "same",
+    strides = 2, activation = "relu"
+  ) |>
+  layer_conv_1d_transpose(filters = 1, kernel_size = 7, padding = "same")
+
+model |> compile(optimizer=optimizer_adam(learning_rate=0.001), loss="mse")
+model
+
## Model: "sequential"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv1d (Conv1D)                 │ (None, 144, 32)        │           256
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, 144, 32)        │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d_1 (Conv1D)               │ (None, 72, 16)         │         3,600
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d_transpose                │ (None, 144, 16)        │         1,808
+## │ (Conv1DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout_1 (Dropout)             │ (None, 144, 16)        │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d_transpose_1              │ (None, 288, 32)        │         3,616
+## │ (Conv1DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv1d_transpose_2              │ (None, 288, 1)         │           225
+## │ (Conv1DTranspose)               │                        │               │
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 9,505 (37.13 KB)
+##  Trainable params: 9,505 (37.13 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+
+

Train the model +

+

Please note that we are using x_train as both the input +and the target since this is a reconstruction model.

+
+history = model |> fit(
+  x_train, x_train,
+  epochs = 50,
+  validation_split = 0.1,
+  callbacks = c(
+    callback_early_stopping(
+      monitor = "val_loss", patience = 5, mode = "min"
+    )
+  )
+)
+
## Epoch 1/50
+## 106/106 - 5s - 51ms/step - loss: 0.1863 - val_loss: 0.0316
+## Epoch 2/50
+## 106/106 - 0s - 2ms/step - loss: 0.0333 - val_loss: 0.0233
+## Epoch 3/50
+## 106/106 - 0s - 2ms/step - loss: 0.0248 - val_loss: 0.0201
+## Epoch 4/50
+## 106/106 - 0s - 2ms/step - loss: 0.0209 - val_loss: 0.0187
+## Epoch 5/50
+## 106/106 - 0s - 2ms/step - loss: 0.0179 - val_loss: 0.0146
+## Epoch 6/50
+## 106/106 - 0s - 2ms/step - loss: 0.0150 - val_loss: 0.0113
+## Epoch 7/50
+## 106/106 - 0s - 2ms/step - loss: 0.0127 - val_loss: 0.0094
+## Epoch 8/50
+## 106/106 - 0s - 2ms/step - loss: 0.0109 - val_loss: 0.0088
+## Epoch 9/50
+## 106/106 - 0s - 2ms/step - loss: 0.0096 - val_loss: 0.0084
+## Epoch 10/50
+## 106/106 - 0s - 2ms/step - loss: 0.0086 - val_loss: 0.0069
+## Epoch 11/50
+## 106/106 - 0s - 2ms/step - loss: 0.0078 - val_loss: 0.0061
+## Epoch 12/50
+## 106/106 - 0s - 2ms/step - loss: 0.0073 - val_loss: 0.0057
+## Epoch 13/50
+## 106/106 - 0s - 2ms/step - loss: 0.0068 - val_loss: 0.0053
+## Epoch 14/50
+## 106/106 - 0s - 2ms/step - loss: 0.0064 - val_loss: 0.0050
+## Epoch 15/50
+## 106/106 - 0s - 2ms/step - loss: 0.0061 - val_loss: 0.0045
+## Epoch 16/50
+## 106/106 - 0s - 2ms/step - loss: 0.0058 - val_loss: 0.0039
+## Epoch 17/50
+## 106/106 - 0s - 2ms/step - loss: 0.0056 - val_loss: 0.0040
+## Epoch 18/50
+## 106/106 - 0s - 2ms/step - loss: 0.0053 - val_loss: 0.0035
+## Epoch 19/50
+## 106/106 - 0s - 2ms/step - loss: 0.0050 - val_loss: 0.0037
+## Epoch 20/50
+## 106/106 - 0s - 2ms/step - loss: 0.0048 - val_loss: 0.0032
+## Epoch 21/50
+## 106/106 - 0s - 2ms/step - loss: 0.0046 - val_loss: 0.0031
+## Epoch 22/50
+## 106/106 - 0s - 2ms/step - loss: 0.0044 - val_loss: 0.0032
+## Epoch 23/50
+## 106/106 - 0s - 2ms/step - loss: 0.0042 - val_loss: 0.0033
+## Epoch 24/50
+## 106/106 - 0s - 2ms/step - loss: 0.0041 - val_loss: 0.0031
+## Epoch 25/50
+## 106/106 - 0s - 2ms/step - loss: 0.0039 - val_loss: 0.0033
+## Epoch 26/50
+## 106/106 - 0s - 2ms/step - loss: 0.0037 - val_loss: 0.0031
+## Epoch 27/50
+## 106/106 - 0s - 2ms/step - loss: 0.0035 - val_loss: 0.0032
+## Epoch 28/50
+## 106/106 - 0s - 2ms/step - loss: 0.0034 - val_loss: 0.0025
+## Epoch 29/50
+## 106/106 - 0s - 2ms/step - loss: 0.0033 - val_loss: 0.0026
+## Epoch 30/50
+## 106/106 - 0s - 2ms/step - loss: 0.0032 - val_loss: 0.0028
+## Epoch 31/50
+## 106/106 - 0s - 2ms/step - loss: 0.0030 - val_loss: 0.0024
+## Epoch 32/50
+## 106/106 - 0s - 2ms/step - loss: 0.0029 - val_loss: 0.0026
+## Epoch 33/50
+## 106/106 - 0s - 2ms/step - loss: 0.0028 - val_loss: 0.0026
+## Epoch 34/50
+## 106/106 - 0s - 2ms/step - loss: 0.0027 - val_loss: 0.0022
+## Epoch 35/50
+## 106/106 - 0s - 2ms/step - loss: 0.0027 - val_loss: 0.0024
+## Epoch 36/50
+## 106/106 - 0s - 2ms/step - loss: 0.0026 - val_loss: 0.0026
+## Epoch 37/50
+## 106/106 - 0s - 2ms/step - loss: 0.0025 - val_loss: 0.0026
+## Epoch 38/50
+## 106/106 - 0s - 2ms/step - loss: 0.0025 - val_loss: 0.0023
+## Epoch 39/50
+## 106/106 - 0s - 2ms/step - loss: 0.0024 - val_loss: 0.0023
+

Let’s plot training and validation loss to see how the training +went.

+
+plot(history)
+
+plot of chunk unnamed-chunk-10
plot of chunk unnamed-chunk-10
+
+
+
+

Detecting anomalies +

+

We will detect anomalies by determining how well our model can +reconstruct the input data.

+
    +
  1. Find MAE loss on training samples.
  2. +
  3. Find max MAE loss value. This is the worst our model has performed +trying to reconstruct a sample. We will make this the +threshold for anomaly detection.
  4. +
  5. If the reconstruction loss for a sample is greater than this +threshold value then we can infer that the model is seeing +a pattern that it isn’t familiar with. We will label this sample as an +anomaly.
  6. +
+
+# Get train MAE loss.
+x_train_pred <- model |> predict(x_train)
+
## 118/118 - 0s - 3ms/step
+
+train_mae_loss <- apply(abs(x_train_pred - x_train), 1, mean)
+
+hist(train_mae_loss, breaks = 50)
+
+plot of chunk unnamed-chunk-11
plot of chunk unnamed-chunk-11
+
+
+# Get reconstruction loss threshold.
+threshold <- max(train_mae_loss)
+cat("Reconstruction error threshold: ", threshold, "\n")
+
## Reconstruction error threshold:  0.039214
+
+

Compare recontruction +

+

Just for fun, let’s see how our model has recontructed the first +sample. This is the 288 timesteps from day 1 of our training +dataset.

+
+# Checking how the first sequence is learnt
+plot(NULL, NULL, ylab = 'Value',
+     xlim = c(0, TIME_STEPS),
+     ylim = range(c(x_train[1,,], x_train_pred[1,,])))
+lines(x_train[1,,])
+lines(x_train_pred[1,,], col = 'red')
+legend("topleft", lty = 1,
+       legend = c("actual", "predicted"),
+       col = c("black", "red"))
+
+plot of chunk unnamed-chunk-12
plot of chunk unnamed-chunk-12
+
+
+
+

Prepare test data +

+
+df_test <- df_daily_jumpsup |>
+  mutate(value =
+           (value - mean(df_small_noise$value)) /
+             sd(df_small_noise$value))
+
+df_test |> head()
+plot_ts(df_test)
+
+plot of chunk unnamed-chunk-13
plot of chunk unnamed-chunk-13
+
+
+# Create sequences from test values.
+x_test <- as_dataset(df_test)
+
+# Get test MAE loss.
+x_test_pred <- model |> predict(x_test)
+test_mae_loss <- apply(abs(x_test_pred - x_test), 1, mean)
+
+hist(test_mae_loss, breaks = 50, xlab = "test MAE loss", ylab = "No of samples")
+
+plot of chunk unnamed-chunk-13
plot of chunk unnamed-chunk-13
+
+
+# Detect all the samples which are anomalies.
+anomalies <- test_mae_loss > threshold
+cat("Number of anomaly samples:", sum(anomalies), "\n")
+cat("Indices of anomaly samples:", which(anomalies), "\n", fill = TRUE)
+
## # A tibble: 6 × 2
+##   timestamp            value
+##   <dttm>               <dbl>
+## 1 2014-04-01 00:00:00 -0.808
+## 2 2014-04-01 00:05:00 -0.781
+## 3 2014-04-01 00:10:00 -0.801
+## 4 2014-04-01 00:15:00 -0.746
+## 5 2014-04-01 00:20:00 -0.792
+## 6 2014-04-01 00:25:00 -0.802
+## 118/118 - 0s - 755us/step
+## Number of anomaly samples: 509
+## Indices of anomaly samples: 216 218 219 220 221 396 398 507 793 794 795 797
+## 798 799 861 974 975 1658 1659 1944 1945 1946 1947 1950 2004 2008 2012 2016
+## 2020 2022 2024 2026 2028 2036 2038 2040 2042 2044 2045 2046 2048 2050 2052
+## 2053 2054 2056 2057 2058 2060 2061 2062 2064 2065 2066 2068 2069 2070 2072
+## 2073 2074 2076 2077 2078 2080 2081 2082 2083 2084 2085 2086 2088 2089 2090
+## 2092 2093 2094 2096 2097 2098 2100 2101 2102 2104 2105 2106 2108 2109 2110
+## 2112 2113 2114 2116 2117 2118 2120 2121 2122 2124 2126 2129 2141 2521 2522
+## 2523 2525 2546 2698 2700 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711
+## 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726
+## 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741
+## 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756
+## 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771
+## 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786
+## 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801
+## 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816
+## 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831
+## 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846
+## 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861
+## 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876
+## 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891
+## 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906
+## 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921
+## 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936
+## 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951
+## 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966
+## 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981
+## 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996
+## 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011
+## 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026
+## 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041
+## 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056
+## 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071
+## 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086
+## 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101
+## 3102
+
+
+
+

Plot anomalies +

+

We now know the samples of the data which are anomalies. With this, +we will find the corresponding timestamps from the original +test data. We will be using the following method to do that:

+

Let’s say time_steps = 3 and we have 10 training values. Our +x_train will look like this:

+
    +
  • 0, 1, 2
  • +
  • 1, 2, 3
  • +
  • 2, 3, 4
  • +
  • 3, 4, 5
  • +
  • 4, 5, 6
  • +
  • 5, 6, 7
  • +
  • 6, 7, 8
  • +
  • 7, 8, 9
  • +
+

All except the initial and the final time_steps-1 data values, will +appear in time_steps number of samples. So, if we know that +the samples [(3, 4, 5), (4, 5, 6), (5, 6, 7)] are anomalies, we can say +that the data point 5 is an anomaly.

+

Let’s overlay the anomalies on the original test data plot.

+
+is_anomaly <- test_mae_loss > threshold
+is_anomaly <- is_anomaly &
+  zoo::rollsum(is_anomaly, TIME_STEPS,
+               align = "right", na.pad = TRUE) >= TIME_STEPS
+
+with(df_test, {
+  plot(value ~ timestamp, type = 'l', xaxt = 'n', las = 2)
+  axis.POSIXct(1, at = seq(timestamp[1], tail(timestamp, 1), by = "days"),
+               format = "%b-%d")
+})
+
+with(df_test[which(is_anomaly),], {
+  points(value ~ timestamp, col = "red")
+})
+
+plot of chunk unnamed-chunk-14
plot of chunk unnamed-chunk-14
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-10-1.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-10-1.png new file mode 100644 index 0000000000..b09a4d2f12 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-10-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-11-1.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-11-1.png new file mode 100644 index 0000000000..a08eb63380 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-11-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-12-1.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-12-1.png new file mode 100644 index 0000000000..70851fd9e2 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-12-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-13-1.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-13-1.png new file mode 100644 index 0000000000..d3ec5b90fe Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-13-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-13-2.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-13-2.png new file mode 100644 index 0000000000..040d259754 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-13-2.png differ diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-14-1.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-14-1.png new file mode 100644 index 0000000000..c8572f66c8 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-14-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-4-1.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-4-1.png new file mode 100644 index 0000000000..f788185b05 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-4-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-5-1.png b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-5-1.png new file mode 100644 index 0000000000..9700adcd2a Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_anomaly_detection/unnamed-chunk-5-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_classification_from_scratch.html b/docs/articles/examples/timeseries/timeseries_classification_from_scratch.html new file mode 100644 index 0000000000..5d5db94e82 --- /dev/null +++ b/docs/articles/examples/timeseries/timeseries_classification_from_scratch.html @@ -0,0 +1,860 @@ + + + + + + + + +Timeseries classification from scratch • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This example shows how to do timeseries classification from scratch, +starting from raw CSV timeseries files on disk. We demonstrate the +workflow on the FordA dataset from the UCR/UEA +archive.

+
+
+

Setup +

+ +
+
+

Load the data: the FordA dataset +

+
+

Dataset description +

+

The dataset we are using here is called FordA. The data comes from +the UCR archive. The dataset contains 3601 training instances and +another 1320 testing instances. Each timeseries corresponds to a +measurement of engine noise captured by a motor sensor. For this task, +the goal is to automatically detect the presence of a specific issue +with the engine. The problem is a balanced binary classification task. +The full description of this dataset can be found here.

+
+
+

Read the TSV data +

+

We will use the FordA_TRAIN file for training and the +FordA_TEST file for testing. The simplicity of this dataset +allows us to demonstrate effectively how to use ConvNets for timeseries +classification. In this file, the first column corresponds to the +label.

+
+get_data <- function(path) {
+  if(path |> startsWith("https://"))
+    path <- get_file(origin = path)  # cache file locally
+
+  data <- readr::read_tsv(
+    path, col_names = FALSE,
+    # Each row is: one integer (the label),
+    # followed by 500 doubles (the timeseries)
+    col_types = paste0("i", strrep("d", 500))
+  )
+
+  y <- as.matrix(data[[1]])
+  x <- as.matrix(data[,-1])
+  dimnames(x) <- dimnames(y) <- NULL
+
+  list(x, y)
+}
+
+root_url <- "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"
+c(x_train, y_train) %<-% get_data(paste0(root_url, "FordA_TRAIN.tsv"))
+c(x_test, y_test) %<-% get_data(paste0(root_url, "FordA_TEST.tsv"))
+
+str(keras3:::named_list(
+  x_train, y_train,
+  x_test, y_test
+))
+
## List of 4
+##  $ x_train: num [1:3601, 1:500] -0.797 0.805 0.728 -0.234 -0.171 ...
+##  $ y_train: int [1:3601, 1] -1 1 -1 -1 -1 1 1 1 1 1 ...
+##  $ x_test : num [1:1320, 1:500] -0.14 0.334 0.717 1.24 -1.159 ...
+##  $ y_test : int [1:1320, 1] -1 -1 -1 1 -1 1 -1 -1 1 1 ...
+
+
+
+

Visualize the data +

+

Here we visualize one timeseries example for each class in the +dataset.

+
+plot(NULL, main = "Timeseries Data",
+     xlab = "Timepoints",  ylab = "Values",
+     xlim = c(1, ncol(x_test)),
+     ylim = range(x_test))
+grid()
+lines(x_test[match(-1, y_test), ], col = "blue")
+lines(x_test[match( 1, y_test), ], col = "red")
+legend("topright", legend=c("label -1", "label 1"), col=c("blue", "red"), lty=1)
+
+Plot of Example Timeseries Data
Plot of Example Timeseries Data
+
+
+
+

Standardize the data +

+

Our timeseries are already in a single length (500). However, their +values are usually in various ranges. This is not ideal for a neural +network; in general we should seek to make the input values normalized. +For this specific dataset, the data is already z-normalized: each +timeseries sample has a mean equal to zero and a standard deviation +equal to one. This type of normalization is very common for timeseries +classification problems, see Bagnall +et al. (2016).

+

Note that the timeseries data used here are univariate, meaning we +only have one channel per timeseries example. We will therefore +transform the timeseries into a multivariate one with one channel using +a simple reshaping via numpy. This will allow us to construct a model +that is easily applicable to multivariate time series.

+
+dim(x_train) <- c(dim(x_train), 1)
+dim(x_test) <- c(dim(x_test), 1)
+

Finally, in order to use +sparse_categorical_crossentropy, we will have to count the +number of classes beforehand.

+
+num_classes <- length(unique(y_train))
+

Now we shuffle the training set because we will be using the +validation_split option later when training.

+
+c(x_train, y_train) %<-% listarrays::shuffle_rows(x_train, y_train)
+# idx <- sample.int(nrow(x_train))
+# x_train %<>% .[idx,, ,drop = FALSE]
+# y_train %<>% .[idx,  ,drop = FALSE]
+

Standardize the labels to positive integers. The expected labels will +then be 0 and 1.

+
+y_train[y_train == -1L] <- 0L
+y_test[y_test == -1L] <- 0L
+
+
+

Build a model +

+

We build a Fully Convolutional Neural Network originally proposed in +this paper. The +implementation is based on the TF 2 version provided here. The following +hyperparameters (kernel_size, filters, the usage of BatchNorm) were +found via random search using KerasTuner.

+
+make_model <- function(input_shape) {
+  inputs <- keras_input(input_shape)
+
+  outputs <- inputs |>
+    # conv1
+    layer_conv_1d(filters = 64, kernel_size = 3, padding = "same") |>
+    layer_batch_normalization() |>
+    layer_activation_relu() |>
+    # conv2
+    layer_conv_1d(filters = 64, kernel_size = 3, padding = "same") |>
+    layer_batch_normalization() |>
+    layer_activation_relu() |>
+    # conv3
+    layer_conv_1d(filters = 64, kernel_size = 3, padding = "same") |>
+    layer_batch_normalization() |>
+    layer_activation_relu() |>
+    # pooling
+    layer_global_average_pooling_1d() |>
+    # final output
+    layer_dense(num_classes, activation = "softmax")
+
+  keras_model(inputs, outputs)
+}
+
+model <- make_model(input_shape = dim(x_train)[-1])
+
+model
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┓
+## ┃ Layer (type)                 Output Shape              Param #  Trai… 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━┩
+## │ input_layer (InputLayer)    │ (None, 500, 1)        │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ conv1d (Conv1D)             │ (None, 500, 64)       │        256Y
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ batch_normalization         │ (None, 500, 64)       │        256Y
+## │ (BatchNormalization)        │                       │            │       │
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ re_lu (ReLU)                │ (None, 500, 64)       │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ conv1d_1 (Conv1D)           │ (None, 500, 64)       │     12,352Y
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ batch_normalization_1       │ (None, 500, 64)       │        256Y
+## │ (BatchNormalization)        │                       │            │       │
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ re_lu_1 (ReLU)              │ (None, 500, 64)       │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ conv1d_2 (Conv1D)           │ (None, 500, 64)       │     12,352Y
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ batch_normalization_2       │ (None, 500, 64)       │        256Y
+## │ (BatchNormalization)        │                       │            │       │
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ re_lu_2 (ReLU)              │ (None, 500, 64)       │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ global_average_pooling1d    │ (None, 64)            │          0-
+## │ (GlobalAveragePooling1D)    │                       │            │       │
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dense (Dense)               │ (None, 2)             │        130Y
+## └─────────────────────────────┴───────────────────────┴────────────┴───────┘
+##  Total params: 25,858 (101.01 KB)
+##  Trainable params: 25,474 (99.51 KB)
+##  Non-trainable params: 384 (1.50 KB)
+
+plot(model, show_shapes = TRUE)
+
+plot of chunk unnamed-chunk-9

+plot of chunk unnamed-chunk-9 +

+
+
+
+

Train the model +

+
+epochs <- 500
+batch_size <- 32
+
+callbacks <- c(
+  callback_model_checkpoint(
+    "best_model.keras", save_best_only = TRUE,
+    monitor = "val_loss"
+  ),
+  callback_reduce_lr_on_plateau(
+    monitor = "val_loss", factor = 0.5,
+    patience = 20, min_lr = 0.0001
+  ),
+  callback_early_stopping(
+    monitor = "val_loss", patience = 50,
+    verbose = 1
+  )
+)
+
+
+model |> compile(
+  optimizer = "adam",
+  loss = "sparse_categorical_crossentropy",
+  metrics = "sparse_categorical_accuracy"
+)
+
+history <- model |> fit(
+  x_train, y_train,
+  batch_size = batch_size,
+  epochs = epochs,
+  callbacks = callbacks,
+  validation_split = 0.2
+)
+
## Epoch 1/500
+## 90/90 - 2s - 26ms/step - loss: 0.5577 - sparse_categorical_accuracy: 0.7066 - val_loss: 0.8512 - val_sparse_categorical_accuracy: 0.4896 - learning_rate: 0.0010
+## Epoch 2/500
+## 90/90 - 1s - 7ms/step - loss: 0.4871 - sparse_categorical_accuracy: 0.7649 - val_loss: 0.9151 - val_sparse_categorical_accuracy: 0.4896 - learning_rate: 0.0010
+## Epoch 3/500
+## 90/90 - 0s - 2ms/step - loss: 0.4678 - sparse_categorical_accuracy: 0.7663 - val_loss: 0.7265 - val_sparse_categorical_accuracy: 0.4910 - learning_rate: 0.0010
+## Epoch 4/500
+## 90/90 - 0s - 2ms/step - loss: 0.4174 - sparse_categorical_accuracy: 0.7941 - val_loss: 0.6591 - val_sparse_categorical_accuracy: 0.5534 - learning_rate: 0.0010
+## Epoch 5/500
+## 90/90 - 0s - 2ms/step - loss: 0.4289 - sparse_categorical_accuracy: 0.7826 - val_loss: 0.5279 - val_sparse_categorical_accuracy: 0.6741 - learning_rate: 0.0010
+## Epoch 6/500
+## 90/90 - 0s - 2ms/step - loss: 0.4058 - sparse_categorical_accuracy: 0.8035 - val_loss: 0.4469 - val_sparse_categorical_accuracy: 0.8086 - learning_rate: 0.0010
+## Epoch 7/500
+## 90/90 - 0s - 2ms/step - loss: 0.4005 - sparse_categorical_accuracy: 0.8042 - val_loss: 0.4278 - val_sparse_categorical_accuracy: 0.7448 - learning_rate: 0.0010
+## Epoch 8/500
+## 90/90 - 0s - 2ms/step - loss: 0.3904 - sparse_categorical_accuracy: 0.8049 - val_loss: 0.3824 - val_sparse_categorical_accuracy: 0.8128 - learning_rate: 0.0010
+## Epoch 9/500
+## 90/90 - 0s - 1ms/step - loss: 0.3830 - sparse_categorical_accuracy: 0.8188 - val_loss: 0.4006 - val_sparse_categorical_accuracy: 0.8100 - learning_rate: 0.0010
+## Epoch 10/500
+## 90/90 - 0s - 1ms/step - loss: 0.3845 - sparse_categorical_accuracy: 0.8080 - val_loss: 0.4014 - val_sparse_categorical_accuracy: 0.8114 - learning_rate: 0.0010
+## Epoch 11/500
+## 90/90 - 0s - 2ms/step - loss: 0.3804 - sparse_categorical_accuracy: 0.8167 - val_loss: 0.3738 - val_sparse_categorical_accuracy: 0.8017 - learning_rate: 0.0010
+## Epoch 12/500
+## 90/90 - 0s - 1ms/step - loss: 0.3695 - sparse_categorical_accuracy: 0.8226 - val_loss: 0.4010 - val_sparse_categorical_accuracy: 0.7989 - learning_rate: 0.0010
+## Epoch 13/500
+## 90/90 - 0s - 1ms/step - loss: 0.3598 - sparse_categorical_accuracy: 0.8313 - val_loss: 0.3742 - val_sparse_categorical_accuracy: 0.8128 - learning_rate: 0.0010
+## Epoch 14/500
+## 90/90 - 0s - 1ms/step - loss: 0.3604 - sparse_categorical_accuracy: 0.8250 - val_loss: 0.3839 - val_sparse_categorical_accuracy: 0.8225 - learning_rate: 0.0010
+## Epoch 15/500
+## 90/90 - 0s - 1ms/step - loss: 0.3436 - sparse_categorical_accuracy: 0.8417 - val_loss: 0.3815 - val_sparse_categorical_accuracy: 0.8225 - learning_rate: 0.0010
+## Epoch 16/500
+## 90/90 - 0s - 2ms/step - loss: 0.3441 - sparse_categorical_accuracy: 0.8444 - val_loss: 0.3387 - val_sparse_categorical_accuracy: 0.8433 - learning_rate: 0.0010
+## Epoch 17/500
+## 90/90 - 0s - 2ms/step - loss: 0.3327 - sparse_categorical_accuracy: 0.8427 - val_loss: 0.3360 - val_sparse_categorical_accuracy: 0.8433 - learning_rate: 0.0010
+## Epoch 18/500
+## 90/90 - 0s - 1ms/step - loss: 0.3296 - sparse_categorical_accuracy: 0.8493 - val_loss: 0.5619 - val_sparse_categorical_accuracy: 0.7074 - learning_rate: 0.0010
+## Epoch 19/500
+## 90/90 - 0s - 1ms/step - loss: 0.3222 - sparse_categorical_accuracy: 0.8608 - val_loss: 0.3951 - val_sparse_categorical_accuracy: 0.8128 - learning_rate: 0.0010
+## Epoch 20/500
+## 90/90 - 0s - 1ms/step - loss: 0.3195 - sparse_categorical_accuracy: 0.8590 - val_loss: 0.5172 - val_sparse_categorical_accuracy: 0.7226 - learning_rate: 0.0010
+## Epoch 21/500
+## 90/90 - 0s - 2ms/step - loss: 0.3003 - sparse_categorical_accuracy: 0.8698 - val_loss: 0.3359 - val_sparse_categorical_accuracy: 0.8405 - learning_rate: 0.0010
+## Epoch 22/500
+## 90/90 - 0s - 2ms/step - loss: 0.3012 - sparse_categorical_accuracy: 0.8740 - val_loss: 0.3339 - val_sparse_categorical_accuracy: 0.8447 - learning_rate: 0.0010
+## Epoch 23/500
+## 90/90 - 0s - 1ms/step - loss: 0.2901 - sparse_categorical_accuracy: 0.8799 - val_loss: 0.3859 - val_sparse_categorical_accuracy: 0.7920 - learning_rate: 0.0010
+## Epoch 24/500
+## 90/90 - 0s - 2ms/step - loss: 0.2898 - sparse_categorical_accuracy: 0.8788 - val_loss: 0.2806 - val_sparse_categorical_accuracy: 0.8863 - learning_rate: 0.0010
+## Epoch 25/500
+## 90/90 - 0s - 1ms/step - loss: 0.2831 - sparse_categorical_accuracy: 0.8785 - val_loss: 0.3124 - val_sparse_categorical_accuracy: 0.8585 - learning_rate: 0.0010
+## Epoch 26/500
+## 90/90 - 0s - 1ms/step - loss: 0.3130 - sparse_categorical_accuracy: 0.8580 - val_loss: 0.3423 - val_sparse_categorical_accuracy: 0.8377 - learning_rate: 0.0010
+## Epoch 27/500
+## 90/90 - 0s - 1ms/step - loss: 0.2741 - sparse_categorical_accuracy: 0.8910 - val_loss: 0.2908 - val_sparse_categorical_accuracy: 0.8835 - learning_rate: 0.0010
+## Epoch 28/500
+## 90/90 - 0s - 1ms/step - loss: 0.2855 - sparse_categorical_accuracy: 0.8837 - val_loss: 0.5778 - val_sparse_categorical_accuracy: 0.7323 - learning_rate: 0.0010
+## Epoch 29/500
+## 90/90 - 0s - 1ms/step - loss: 0.2671 - sparse_categorical_accuracy: 0.8809 - val_loss: 0.4736 - val_sparse_categorical_accuracy: 0.7628 - learning_rate: 0.0010
+## Epoch 30/500
+## 90/90 - 0s - 1ms/step - loss: 0.2720 - sparse_categorical_accuracy: 0.8833 - val_loss: 0.4564 - val_sparse_categorical_accuracy: 0.7767 - learning_rate: 0.0010
+## Epoch 31/500
+## 90/90 - 0s - 1ms/step - loss: 0.2724 - sparse_categorical_accuracy: 0.8882 - val_loss: 0.2784 - val_sparse_categorical_accuracy: 0.8918 - learning_rate: 0.0010
+## Epoch 32/500
+## 90/90 - 0s - 1ms/step - loss: 0.2709 - sparse_categorical_accuracy: 0.8830 - val_loss: 0.3442 - val_sparse_categorical_accuracy: 0.8280 - learning_rate: 0.0010
+## Epoch 33/500
+## 90/90 - 0s - 1ms/step - loss: 0.2571 - sparse_categorical_accuracy: 0.8986 - val_loss: 0.3730 - val_sparse_categorical_accuracy: 0.7864 - learning_rate: 0.0010
+## Epoch 34/500
+## 90/90 - 0s - 1ms/step - loss: 0.2449 - sparse_categorical_accuracy: 0.9003 - val_loss: 0.2799 - val_sparse_categorical_accuracy: 0.8877 - learning_rate: 0.0010
+## Epoch 35/500
+## 90/90 - 0s - 1ms/step - loss: 0.2698 - sparse_categorical_accuracy: 0.8861 - val_loss: 0.4860 - val_sparse_categorical_accuracy: 0.7420 - learning_rate: 0.0010
+## Epoch 36/500
+## 90/90 - 0s - 1ms/step - loss: 0.2539 - sparse_categorical_accuracy: 0.8944 - val_loss: 0.2572 - val_sparse_categorical_accuracy: 0.8974 - learning_rate: 0.0010
+## Epoch 37/500
+## 90/90 - 0s - 1ms/step - loss: 0.2495 - sparse_categorical_accuracy: 0.8962 - val_loss: 0.2735 - val_sparse_categorical_accuracy: 0.8724 - learning_rate: 0.0010
+## Epoch 38/500
+## 90/90 - 0s - 1ms/step - loss: 0.2474 - sparse_categorical_accuracy: 0.9000 - val_loss: 0.3009 - val_sparse_categorical_accuracy: 0.8641 - learning_rate: 0.0010
+## Epoch 39/500
+## 90/90 - 0s - 1ms/step - loss: 0.2492 - sparse_categorical_accuracy: 0.8976 - val_loss: 1.0276 - val_sparse_categorical_accuracy: 0.6283 - learning_rate: 0.0010
+## Epoch 40/500
+## 90/90 - 0s - 1ms/step - loss: 0.2477 - sparse_categorical_accuracy: 0.8896 - val_loss: 0.4444 - val_sparse_categorical_accuracy: 0.8086 - learning_rate: 0.0010
+## Epoch 41/500
+## 90/90 - 0s - 1ms/step - loss: 0.2315 - sparse_categorical_accuracy: 0.9069 - val_loss: 0.4963 - val_sparse_categorical_accuracy: 0.7573 - learning_rate: 0.0010
+## Epoch 42/500
+## 90/90 - 0s - 1ms/step - loss: 0.2279 - sparse_categorical_accuracy: 0.9101 - val_loss: 0.2943 - val_sparse_categorical_accuracy: 0.8849 - learning_rate: 0.0010
+## Epoch 43/500
+## 90/90 - 0s - 1ms/step - loss: 0.2365 - sparse_categorical_accuracy: 0.8983 - val_loss: 0.6205 - val_sparse_categorical_accuracy: 0.7226 - learning_rate: 0.0010
+## Epoch 44/500
+## 90/90 - 0s - 1ms/step - loss: 0.2314 - sparse_categorical_accuracy: 0.9080 - val_loss: 0.3348 - val_sparse_categorical_accuracy: 0.8419 - learning_rate: 0.0010
+## Epoch 45/500
+## 90/90 - 0s - 1ms/step - loss: 0.2301 - sparse_categorical_accuracy: 0.9094 - val_loss: 0.4539 - val_sparse_categorical_accuracy: 0.7753 - learning_rate: 0.0010
+## Epoch 46/500
+## 90/90 - 0s - 1ms/step - loss: 0.2265 - sparse_categorical_accuracy: 0.9090 - val_loss: 0.2660 - val_sparse_categorical_accuracy: 0.8974 - learning_rate: 0.0010
+## Epoch 47/500
+## 90/90 - 0s - 1ms/step - loss: 0.2065 - sparse_categorical_accuracy: 0.9188 - val_loss: 0.2680 - val_sparse_categorical_accuracy: 0.8932 - learning_rate: 0.0010
+## Epoch 48/500
+## 90/90 - 0s - 2ms/step - loss: 0.2107 - sparse_categorical_accuracy: 0.9139 - val_loss: 0.2204 - val_sparse_categorical_accuracy: 0.9126 - learning_rate: 0.0010
+## Epoch 49/500
+## 90/90 - 0s - 1ms/step - loss: 0.2009 - sparse_categorical_accuracy: 0.9174 - val_loss: 0.3135 - val_sparse_categorical_accuracy: 0.8336 - learning_rate: 0.0010
+## Epoch 50/500
+## 90/90 - 0s - 1ms/step - loss: 0.2138 - sparse_categorical_accuracy: 0.9160 - val_loss: 0.4732 - val_sparse_categorical_accuracy: 0.7587 - learning_rate: 0.0010
+## Epoch 51/500
+## 90/90 - 0s - 1ms/step - loss: 0.1992 - sparse_categorical_accuracy: 0.9201 - val_loss: 0.6327 - val_sparse_categorical_accuracy: 0.7365 - learning_rate: 0.0010
+## Epoch 52/500
+## 90/90 - 0s - 1ms/step - loss: 0.1888 - sparse_categorical_accuracy: 0.9267 - val_loss: 0.2356 - val_sparse_categorical_accuracy: 0.8932 - learning_rate: 0.0010
+## Epoch 53/500
+## 90/90 - 0s - 1ms/step - loss: 0.1837 - sparse_categorical_accuracy: 0.9278 - val_loss: 0.2637 - val_sparse_categorical_accuracy: 0.8877 - learning_rate: 0.0010
+## Epoch 54/500
+## 90/90 - 0s - 2ms/step - loss: 0.1758 - sparse_categorical_accuracy: 0.9354 - val_loss: 0.2062 - val_sparse_categorical_accuracy: 0.9126 - learning_rate: 0.0010
+## Epoch 55/500
+## 90/90 - 0s - 1ms/step - loss: 0.1668 - sparse_categorical_accuracy: 0.9438 - val_loss: 0.6011 - val_sparse_categorical_accuracy: 0.7559 - learning_rate: 0.0010
+## Epoch 56/500
+## 90/90 - 0s - 1ms/step - loss: 0.1580 - sparse_categorical_accuracy: 0.9476 - val_loss: 0.4532 - val_sparse_categorical_accuracy: 0.7587 - learning_rate: 0.0010
+## Epoch 57/500
+## 90/90 - 0s - 1ms/step - loss: 0.1518 - sparse_categorical_accuracy: 0.9521 - val_loss: 1.1827 - val_sparse_categorical_accuracy: 0.5492 - learning_rate: 0.0010
+## Epoch 58/500
+## 90/90 - 0s - 2ms/step - loss: 0.1449 - sparse_categorical_accuracy: 0.9528 - val_loss: 0.1922 - val_sparse_categorical_accuracy: 0.9168 - learning_rate: 0.0010
+## Epoch 59/500
+## 90/90 - 0s - 1ms/step - loss: 0.1520 - sparse_categorical_accuracy: 0.9476 - val_loss: 2.0179 - val_sparse_categorical_accuracy: 0.6089 - learning_rate: 0.0010
+## Epoch 60/500
+## 90/90 - 0s - 1ms/step - loss: 0.1457 - sparse_categorical_accuracy: 0.9503 - val_loss: 0.2761 - val_sparse_categorical_accuracy: 0.8946 - learning_rate: 0.0010
+## Epoch 61/500
+## 90/90 - 0s - 1ms/step - loss: 0.1280 - sparse_categorical_accuracy: 0.9580 - val_loss: 1.6068 - val_sparse_categorical_accuracy: 0.6796 - learning_rate: 0.0010
+## Epoch 62/500
+## 90/90 - 0s - 1ms/step - loss: 0.1408 - sparse_categorical_accuracy: 0.9510 - val_loss: 0.9512 - val_sparse_categorical_accuracy: 0.7129 - learning_rate: 0.0010
+## Epoch 63/500
+## 90/90 - 0s - 1ms/step - loss: 0.1381 - sparse_categorical_accuracy: 0.9514 - val_loss: 1.2871 - val_sparse_categorical_accuracy: 0.6976 - learning_rate: 0.0010
+## Epoch 64/500
+## 90/90 - 0s - 1ms/step - loss: 0.1280 - sparse_categorical_accuracy: 0.9569 - val_loss: 0.3003 - val_sparse_categorical_accuracy: 0.8724 - learning_rate: 0.0010
+## Epoch 65/500
+## 90/90 - 0s - 1ms/step - loss: 0.1206 - sparse_categorical_accuracy: 0.9618 - val_loss: 0.4581 - val_sparse_categorical_accuracy: 0.8086 - learning_rate: 0.0010
+## Epoch 66/500
+## 90/90 - 0s - 1ms/step - loss: 0.1148 - sparse_categorical_accuracy: 0.9628 - val_loss: 0.3863 - val_sparse_categorical_accuracy: 0.8266 - learning_rate: 0.0010
+## Epoch 67/500
+## 90/90 - 0s - 1ms/step - loss: 0.1068 - sparse_categorical_accuracy: 0.9663 - val_loss: 1.2923 - val_sparse_categorical_accuracy: 0.6852 - learning_rate: 0.0010
+## Epoch 68/500
+## 90/90 - 0s - 1ms/step - loss: 0.1159 - sparse_categorical_accuracy: 0.9615 - val_loss: 0.2969 - val_sparse_categorical_accuracy: 0.8558 - learning_rate: 0.0010
+## Epoch 69/500
+## 90/90 - 0s - 1ms/step - loss: 0.1228 - sparse_categorical_accuracy: 0.9580 - val_loss: 0.2251 - val_sparse_categorical_accuracy: 0.9071 - learning_rate: 0.0010
+## Epoch 70/500
+## 90/90 - 0s - 2ms/step - loss: 0.1138 - sparse_categorical_accuracy: 0.9622 - val_loss: 0.1805 - val_sparse_categorical_accuracy: 0.9251 - learning_rate: 0.0010
+## Epoch 71/500
+## 90/90 - 0s - 2ms/step - loss: 0.1101 - sparse_categorical_accuracy: 0.9618 - val_loss: 0.1439 - val_sparse_categorical_accuracy: 0.9487 - learning_rate: 0.0010
+## Epoch 72/500
+## 90/90 - 0s - 2ms/step - loss: 0.1133 - sparse_categorical_accuracy: 0.9628 - val_loss: 0.1390 - val_sparse_categorical_accuracy: 0.9459 - learning_rate: 0.0010
+## Epoch 73/500
+## 90/90 - 0s - 1ms/step - loss: 0.1042 - sparse_categorical_accuracy: 0.9698 - val_loss: 0.1421 - val_sparse_categorical_accuracy: 0.9445 - learning_rate: 0.0010
+## Epoch 74/500
+## 90/90 - 0s - 1ms/step - loss: 0.1098 - sparse_categorical_accuracy: 0.9604 - val_loss: 2.0644 - val_sparse_categorical_accuracy: 0.6685 - learning_rate: 0.0010
+## Epoch 75/500
+## 90/90 - 0s - 1ms/step - loss: 0.1419 - sparse_categorical_accuracy: 0.9486 - val_loss: 0.4101 - val_sparse_categorical_accuracy: 0.8336 - learning_rate: 0.0010
+## Epoch 76/500
+## 90/90 - 0s - 1ms/step - loss: 0.1230 - sparse_categorical_accuracy: 0.9569 - val_loss: 0.3055 - val_sparse_categorical_accuracy: 0.8946 - learning_rate: 0.0010
+## Epoch 77/500
+## 90/90 - 0s - 1ms/step - loss: 0.1028 - sparse_categorical_accuracy: 0.9656 - val_loss: 0.1704 - val_sparse_categorical_accuracy: 0.9265 - learning_rate: 0.0010
+## Epoch 78/500
+## 90/90 - 0s - 1ms/step - loss: 0.1003 - sparse_categorical_accuracy: 0.9674 - val_loss: 0.3304 - val_sparse_categorical_accuracy: 0.8682 - learning_rate: 0.0010
+## Epoch 79/500
+## 90/90 - 0s - 1ms/step - loss: 0.1019 - sparse_categorical_accuracy: 0.9691 - val_loss: 0.5429 - val_sparse_categorical_accuracy: 0.7822 - learning_rate: 0.0010
+## Epoch 80/500
+## 90/90 - 0s - 1ms/step - loss: 0.1037 - sparse_categorical_accuracy: 0.9670 - val_loss: 2.0288 - val_sparse_categorical_accuracy: 0.6976 - learning_rate: 0.0010
+## Epoch 81/500
+## 90/90 - 0s - 1ms/step - loss: 0.1030 - sparse_categorical_accuracy: 0.9667 - val_loss: 3.3225 - val_sparse_categorical_accuracy: 0.5492 - learning_rate: 0.0010
+## Epoch 82/500
+## 90/90 - 0s - 2ms/step - loss: 0.0947 - sparse_categorical_accuracy: 0.9708 - val_loss: 0.1449 - val_sparse_categorical_accuracy: 0.9390 - learning_rate: 0.0010
+## Epoch 83/500
+## 90/90 - 0s - 2ms/step - loss: 0.1060 - sparse_categorical_accuracy: 0.9639 - val_loss: 0.2491 - val_sparse_categorical_accuracy: 0.9015 - learning_rate: 0.0010
+## Epoch 84/500
+## 90/90 - 0s - 2ms/step - loss: 0.0958 - sparse_categorical_accuracy: 0.9656 - val_loss: 0.1281 - val_sparse_categorical_accuracy: 0.9404 - learning_rate: 0.0010
+## Epoch 85/500
+## 90/90 - 0s - 1ms/step - loss: 0.0993 - sparse_categorical_accuracy: 0.9674 - val_loss: 0.1956 - val_sparse_categorical_accuracy: 0.9168 - learning_rate: 0.0010
+## Epoch 86/500
+## 90/90 - 0s - 1ms/step - loss: 0.1003 - sparse_categorical_accuracy: 0.9604 - val_loss: 0.4167 - val_sparse_categorical_accuracy: 0.8239 - learning_rate: 0.0010
+## Epoch 87/500
+## 90/90 - 0s - 1ms/step - loss: 0.1099 - sparse_categorical_accuracy: 0.9604 - val_loss: 0.1557 - val_sparse_categorical_accuracy: 0.9390 - learning_rate: 0.0010
+## Epoch 88/500
+## 90/90 - 0s - 1ms/step - loss: 0.1002 - sparse_categorical_accuracy: 0.9656 - val_loss: 0.1204 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 0.0010
+## Epoch 89/500
+## 90/90 - 0s - 2ms/step - loss: 0.0969 - sparse_categorical_accuracy: 0.9663 - val_loss: 0.1194 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 0.0010
+## Epoch 90/500
+## 90/90 - 0s - 1ms/step - loss: 0.0937 - sparse_categorical_accuracy: 0.9691 - val_loss: 0.1397 - val_sparse_categorical_accuracy: 0.9431 - learning_rate: 0.0010
+## Epoch 91/500
+## 90/90 - 0s - 1ms/step - loss: 0.1024 - sparse_categorical_accuracy: 0.9660 - val_loss: 2.1705 - val_sparse_categorical_accuracy: 0.6671 - learning_rate: 0.0010
+## Epoch 92/500
+## 90/90 - 0s - 2ms/step - loss: 0.0928 - sparse_categorical_accuracy: 0.9733 - val_loss: 0.7758 - val_sparse_categorical_accuracy: 0.7559 - learning_rate: 0.0010
+## Epoch 93/500
+## 90/90 - 0s - 1ms/step - loss: 0.0905 - sparse_categorical_accuracy: 0.9708 - val_loss: 0.2143 - val_sparse_categorical_accuracy: 0.9140 - learning_rate: 0.0010
+## Epoch 94/500
+## 90/90 - 0s - 1ms/step - loss: 0.0919 - sparse_categorical_accuracy: 0.9708 - val_loss: 0.1413 - val_sparse_categorical_accuracy: 0.9445 - learning_rate: 0.0010
+## Epoch 95/500
+## 90/90 - 0s - 1ms/step - loss: 0.0884 - sparse_categorical_accuracy: 0.9677 - val_loss: 0.1715 - val_sparse_categorical_accuracy: 0.9362 - learning_rate: 0.0010
+## Epoch 96/500
+## 90/90 - 0s - 1ms/step - loss: 0.1015 - sparse_categorical_accuracy: 0.9653 - val_loss: 0.3827 - val_sparse_categorical_accuracy: 0.8669 - learning_rate: 0.0010
+## Epoch 97/500
+## 90/90 - 0s - 1ms/step - loss: 0.0945 - sparse_categorical_accuracy: 0.9663 - val_loss: 0.1286 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 0.0010
+## Epoch 98/500
+## 90/90 - 0s - 1ms/step - loss: 0.0924 - sparse_categorical_accuracy: 0.9698 - val_loss: 0.2279 - val_sparse_categorical_accuracy: 0.9126 - learning_rate: 0.0010
+## Epoch 99/500
+## 90/90 - 0s - 1ms/step - loss: 0.0951 - sparse_categorical_accuracy: 0.9656 - val_loss: 0.1800 - val_sparse_categorical_accuracy: 0.9348 - learning_rate: 0.0010
+## Epoch 100/500
+## 90/90 - 0s - 1ms/step - loss: 0.0864 - sparse_categorical_accuracy: 0.9729 - val_loss: 0.2424 - val_sparse_categorical_accuracy: 0.9085 - learning_rate: 0.0010
+## Epoch 101/500
+## 90/90 - 0s - 1ms/step - loss: 0.0830 - sparse_categorical_accuracy: 0.9719 - val_loss: 0.1904 - val_sparse_categorical_accuracy: 0.9251 - learning_rate: 0.0010
+## Epoch 102/500
+## 90/90 - 0s - 1ms/step - loss: 0.0973 - sparse_categorical_accuracy: 0.9670 - val_loss: 0.1916 - val_sparse_categorical_accuracy: 0.9182 - learning_rate: 0.0010
+## Epoch 103/500
+## 90/90 - 0s - 1ms/step - loss: 0.0883 - sparse_categorical_accuracy: 0.9719 - val_loss: 0.1190 - val_sparse_categorical_accuracy: 0.9515 - learning_rate: 0.0010
+## Epoch 104/500
+## 90/90 - 0s - 1ms/step - loss: 0.0936 - sparse_categorical_accuracy: 0.9677 - val_loss: 0.2695 - val_sparse_categorical_accuracy: 0.8932 - learning_rate: 0.0010
+## Epoch 105/500
+## 90/90 - 0s - 1ms/step - loss: 0.0905 - sparse_categorical_accuracy: 0.9684 - val_loss: 0.1941 - val_sparse_categorical_accuracy: 0.9182 - learning_rate: 0.0010
+## Epoch 106/500
+## 90/90 - 0s - 1ms/step - loss: 0.0885 - sparse_categorical_accuracy: 0.9674 - val_loss: 0.5663 - val_sparse_categorical_accuracy: 0.8322 - learning_rate: 0.0010
+## Epoch 107/500
+## 90/90 - 0s - 1ms/step - loss: 0.0891 - sparse_categorical_accuracy: 0.9674 - val_loss: 0.6871 - val_sparse_categorical_accuracy: 0.7490 - learning_rate: 0.0010
+## Epoch 108/500
+## 90/90 - 0s - 1ms/step - loss: 0.0890 - sparse_categorical_accuracy: 0.9701 - val_loss: 0.7344 - val_sparse_categorical_accuracy: 0.8031 - learning_rate: 0.0010
+## Epoch 109/500
+## 90/90 - 0s - 1ms/step - loss: 0.0894 - sparse_categorical_accuracy: 0.9701 - val_loss: 0.3726 - val_sparse_categorical_accuracy: 0.8682 - learning_rate: 0.0010
+## Epoch 110/500
+## 90/90 - 0s - 1ms/step - loss: 0.0963 - sparse_categorical_accuracy: 0.9660 - val_loss: 0.6424 - val_sparse_categorical_accuracy: 0.7961 - learning_rate: 0.0010
+## Epoch 111/500
+## 90/90 - 0s - 1ms/step - loss: 0.0819 - sparse_categorical_accuracy: 0.9733 - val_loss: 0.8653 - val_sparse_categorical_accuracy: 0.7642 - learning_rate: 0.0010
+## Epoch 112/500
+## 90/90 - 0s - 1ms/step - loss: 0.0852 - sparse_categorical_accuracy: 0.9691 - val_loss: 1.0738 - val_sparse_categorical_accuracy: 0.7393 - learning_rate: 0.0010
+## Epoch 113/500
+## 90/90 - 0s - 1ms/step - loss: 0.0828 - sparse_categorical_accuracy: 0.9722 - val_loss: 0.1299 - val_sparse_categorical_accuracy: 0.9473 - learning_rate: 0.0010
+## Epoch 114/500
+## 90/90 - 0s - 1ms/step - loss: 0.1042 - sparse_categorical_accuracy: 0.9649 - val_loss: 0.2819 - val_sparse_categorical_accuracy: 0.8932 - learning_rate: 0.0010
+## Epoch 115/500
+## 90/90 - 0s - 1ms/step - loss: 0.0780 - sparse_categorical_accuracy: 0.9726 - val_loss: 0.2349 - val_sparse_categorical_accuracy: 0.9001 - learning_rate: 0.0010
+## Epoch 116/500
+## 90/90 - 0s - 1ms/step - loss: 0.0850 - sparse_categorical_accuracy: 0.9743 - val_loss: 0.9277 - val_sparse_categorical_accuracy: 0.7406 - learning_rate: 0.0010
+## Epoch 117/500
+## 90/90 - 0s - 1ms/step - loss: 0.0813 - sparse_categorical_accuracy: 0.9708 - val_loss: 0.7052 - val_sparse_categorical_accuracy: 0.7836 - learning_rate: 0.0010
+## Epoch 118/500
+## 90/90 - 0s - 1ms/step - loss: 0.0838 - sparse_categorical_accuracy: 0.9701 - val_loss: 0.1543 - val_sparse_categorical_accuracy: 0.9307 - learning_rate: 0.0010
+## Epoch 119/500
+## 90/90 - 0s - 2ms/step - loss: 0.0817 - sparse_categorical_accuracy: 0.9722 - val_loss: 0.1127 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 0.0010
+## Epoch 120/500
+## 90/90 - 0s - 1ms/step - loss: 0.0846 - sparse_categorical_accuracy: 0.9715 - val_loss: 0.1209 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 0.0010
+## Epoch 121/500
+## 90/90 - 0s - 1ms/step - loss: 0.0817 - sparse_categorical_accuracy: 0.9740 - val_loss: 0.1134 - val_sparse_categorical_accuracy: 0.9556 - learning_rate: 0.0010
+## Epoch 122/500
+## 90/90 - 0s - 1ms/step - loss: 0.0872 - sparse_categorical_accuracy: 0.9691 - val_loss: 0.6854 - val_sparse_categorical_accuracy: 0.7892 - learning_rate: 0.0010
+## Epoch 123/500
+## 90/90 - 0s - 1ms/step - loss: 0.0795 - sparse_categorical_accuracy: 0.9743 - val_loss: 0.8494 - val_sparse_categorical_accuracy: 0.7448 - learning_rate: 0.0010
+## Epoch 124/500
+## 90/90 - 0s - 1ms/step - loss: 0.0965 - sparse_categorical_accuracy: 0.9632 - val_loss: 0.2033 - val_sparse_categorical_accuracy: 0.9140 - learning_rate: 0.0010
+## Epoch 125/500
+## 90/90 - 0s - 1ms/step - loss: 0.0866 - sparse_categorical_accuracy: 0.9715 - val_loss: 0.3366 - val_sparse_categorical_accuracy: 0.8724 - learning_rate: 0.0010
+## Epoch 126/500
+## 90/90 - 0s - 1ms/step - loss: 0.0792 - sparse_categorical_accuracy: 0.9747 - val_loss: 1.1318 - val_sparse_categorical_accuracy: 0.7462 - learning_rate: 0.0010
+## Epoch 127/500
+## 90/90 - 0s - 1ms/step - loss: 0.0810 - sparse_categorical_accuracy: 0.9760 - val_loss: 0.1230 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 0.0010
+## Epoch 128/500
+## 90/90 - 0s - 1ms/step - loss: 0.0839 - sparse_categorical_accuracy: 0.9698 - val_loss: 0.1734 - val_sparse_categorical_accuracy: 0.9279 - learning_rate: 0.0010
+## Epoch 129/500
+## 90/90 - 0s - 1ms/step - loss: 0.0845 - sparse_categorical_accuracy: 0.9719 - val_loss: 0.2233 - val_sparse_categorical_accuracy: 0.9154 - learning_rate: 0.0010
+## Epoch 130/500
+## 90/90 - 0s - 1ms/step - loss: 0.0906 - sparse_categorical_accuracy: 0.9694 - val_loss: 1.4223 - val_sparse_categorical_accuracy: 0.6963 - learning_rate: 0.0010
+## Epoch 131/500
+## 90/90 - 0s - 1ms/step - loss: 0.0774 - sparse_categorical_accuracy: 0.9743 - val_loss: 2.8725 - val_sparse_categorical_accuracy: 0.7018 - learning_rate: 0.0010
+## Epoch 132/500
+## 90/90 - 0s - 1ms/step - loss: 0.0748 - sparse_categorical_accuracy: 0.9757 - val_loss: 0.8038 - val_sparse_categorical_accuracy: 0.7781 - learning_rate: 0.0010
+## Epoch 133/500
+## 90/90 - 0s - 1ms/step - loss: 0.0739 - sparse_categorical_accuracy: 0.9726 - val_loss: 0.5711 - val_sparse_categorical_accuracy: 0.7809 - learning_rate: 0.0010
+## Epoch 134/500
+## 90/90 - 0s - 1ms/step - loss: 0.0780 - sparse_categorical_accuracy: 0.9722 - val_loss: 0.2510 - val_sparse_categorical_accuracy: 0.8918 - learning_rate: 0.0010
+## Epoch 135/500
+## 90/90 - 0s - 1ms/step - loss: 0.0801 - sparse_categorical_accuracy: 0.9722 - val_loss: 0.4907 - val_sparse_categorical_accuracy: 0.8585 - learning_rate: 0.0010
+## Epoch 136/500
+## 90/90 - 0s - 1ms/step - loss: 0.0819 - sparse_categorical_accuracy: 0.9670 - val_loss: 0.1151 - val_sparse_categorical_accuracy: 0.9556 - learning_rate: 0.0010
+## Epoch 137/500
+## 90/90 - 0s - 1ms/step - loss: 0.0840 - sparse_categorical_accuracy: 0.9715 - val_loss: 1.2419 - val_sparse_categorical_accuracy: 0.7240 - learning_rate: 0.0010
+## Epoch 138/500
+## 90/90 - 0s - 1ms/step - loss: 0.0774 - sparse_categorical_accuracy: 0.9743 - val_loss: 0.6905 - val_sparse_categorical_accuracy: 0.7864 - learning_rate: 0.0010
+## Epoch 139/500
+## 90/90 - 0s - 1ms/step - loss: 0.0852 - sparse_categorical_accuracy: 0.9691 - val_loss: 0.1253 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 0.0010
+## Epoch 140/500
+## 90/90 - 1s - 6ms/step - loss: 0.0666 - sparse_categorical_accuracy: 0.9785 - val_loss: 0.2353 - val_sparse_categorical_accuracy: 0.9140 - learning_rate: 5.0000e-04
+## Epoch 141/500
+## 90/90 - 0s - 1ms/step - loss: 0.0657 - sparse_categorical_accuracy: 0.9771 - val_loss: 0.1275 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 5.0000e-04
+## Epoch 142/500
+## 90/90 - 0s - 1ms/step - loss: 0.0635 - sparse_categorical_accuracy: 0.9788 - val_loss: 0.2696 - val_sparse_categorical_accuracy: 0.9029 - learning_rate: 5.0000e-04
+## Epoch 143/500
+## 90/90 - 0s - 1ms/step - loss: 0.0688 - sparse_categorical_accuracy: 0.9767 - val_loss: 0.1737 - val_sparse_categorical_accuracy: 0.9293 - learning_rate: 5.0000e-04
+## Epoch 144/500
+## 90/90 - 0s - 1ms/step - loss: 0.0615 - sparse_categorical_accuracy: 0.9826 - val_loss: 0.2506 - val_sparse_categorical_accuracy: 0.8946 - learning_rate: 5.0000e-04
+## Epoch 145/500
+## 90/90 - 0s - 1ms/step - loss: 0.0629 - sparse_categorical_accuracy: 0.9802 - val_loss: 0.1486 - val_sparse_categorical_accuracy: 0.9362 - learning_rate: 5.0000e-04
+## Epoch 146/500
+## 90/90 - 0s - 1ms/step - loss: 0.0656 - sparse_categorical_accuracy: 0.9771 - val_loss: 0.1651 - val_sparse_categorical_accuracy: 0.9307 - learning_rate: 5.0000e-04
+## Epoch 147/500
+## 90/90 - 0s - 1ms/step - loss: 0.0677 - sparse_categorical_accuracy: 0.9774 - val_loss: 0.1034 - val_sparse_categorical_accuracy: 0.9667 - learning_rate: 5.0000e-04
+## Epoch 148/500
+## 90/90 - 0s - 1ms/step - loss: 0.0762 - sparse_categorical_accuracy: 0.9726 - val_loss: 0.1455 - val_sparse_categorical_accuracy: 0.9417 - learning_rate: 5.0000e-04
+## Epoch 149/500
+## 90/90 - 0s - 1ms/step - loss: 0.0657 - sparse_categorical_accuracy: 0.9774 - val_loss: 0.1037 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 5.0000e-04
+## Epoch 150/500
+## 90/90 - 0s - 1ms/step - loss: 0.0686 - sparse_categorical_accuracy: 0.9753 - val_loss: 0.1306 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 5.0000e-04
+## Epoch 151/500
+## 90/90 - 0s - 1ms/step - loss: 0.0667 - sparse_categorical_accuracy: 0.9771 - val_loss: 0.1227 - val_sparse_categorical_accuracy: 0.9515 - learning_rate: 5.0000e-04
+## Epoch 152/500
+## 90/90 - 0s - 1ms/step - loss: 0.0677 - sparse_categorical_accuracy: 0.9757 - val_loss: 0.1041 - val_sparse_categorical_accuracy: 0.9626 - learning_rate: 5.0000e-04
+## Epoch 153/500
+## 90/90 - 0s - 1ms/step - loss: 0.0638 - sparse_categorical_accuracy: 0.9802 - val_loss: 0.1837 - val_sparse_categorical_accuracy: 0.9307 - learning_rate: 5.0000e-04
+## Epoch 154/500
+## 90/90 - 0s - 1ms/step - loss: 0.0608 - sparse_categorical_accuracy: 0.9806 - val_loss: 0.1237 - val_sparse_categorical_accuracy: 0.9570 - learning_rate: 5.0000e-04
+## Epoch 155/500
+## 90/90 - 0s - 1ms/step - loss: 0.0662 - sparse_categorical_accuracy: 0.9806 - val_loss: 0.1192 - val_sparse_categorical_accuracy: 0.9515 - learning_rate: 5.0000e-04
+## Epoch 156/500
+## 90/90 - 0s - 1ms/step - loss: 0.0672 - sparse_categorical_accuracy: 0.9781 - val_loss: 0.1262 - val_sparse_categorical_accuracy: 0.9473 - learning_rate: 5.0000e-04
+## Epoch 157/500
+## 90/90 - 0s - 1ms/step - loss: 0.0634 - sparse_categorical_accuracy: 0.9760 - val_loss: 0.1035 - val_sparse_categorical_accuracy: 0.9556 - learning_rate: 5.0000e-04
+## Epoch 158/500
+## 90/90 - 0s - 1ms/step - loss: 0.0657 - sparse_categorical_accuracy: 0.9785 - val_loss: 0.2735 - val_sparse_categorical_accuracy: 0.8988 - learning_rate: 5.0000e-04
+## Epoch 159/500
+## 90/90 - 0s - 1ms/step - loss: 0.0685 - sparse_categorical_accuracy: 0.9750 - val_loss: 0.2094 - val_sparse_categorical_accuracy: 0.9168 - learning_rate: 5.0000e-04
+## Epoch 160/500
+## 90/90 - 0s - 1ms/step - loss: 0.0712 - sparse_categorical_accuracy: 0.9733 - val_loss: 0.1137 - val_sparse_categorical_accuracy: 0.9459 - learning_rate: 5.0000e-04
+## Epoch 161/500
+## 90/90 - 0s - 1ms/step - loss: 0.0675 - sparse_categorical_accuracy: 0.9760 - val_loss: 0.1544 - val_sparse_categorical_accuracy: 0.9320 - learning_rate: 5.0000e-04
+## Epoch 162/500
+## 90/90 - 0s - 1ms/step - loss: 0.0610 - sparse_categorical_accuracy: 0.9785 - val_loss: 0.1070 - val_sparse_categorical_accuracy: 0.9639 - learning_rate: 5.0000e-04
+## Epoch 163/500
+## 90/90 - 0s - 1ms/step - loss: 0.0572 - sparse_categorical_accuracy: 0.9816 - val_loss: 0.1382 - val_sparse_categorical_accuracy: 0.9473 - learning_rate: 5.0000e-04
+## Epoch 164/500
+## 90/90 - 0s - 1ms/step - loss: 0.0617 - sparse_categorical_accuracy: 0.9813 - val_loss: 0.1737 - val_sparse_categorical_accuracy: 0.9265 - learning_rate: 5.0000e-04
+## Epoch 165/500
+## 90/90 - 0s - 1ms/step - loss: 0.0660 - sparse_categorical_accuracy: 0.9750 - val_loss: 0.1737 - val_sparse_categorical_accuracy: 0.9404 - learning_rate: 5.0000e-04
+## Epoch 166/500
+## 90/90 - 0s - 1ms/step - loss: 0.0632 - sparse_categorical_accuracy: 0.9781 - val_loss: 0.1408 - val_sparse_categorical_accuracy: 0.9501 - learning_rate: 5.0000e-04
+## Epoch 167/500
+## 90/90 - 0s - 1ms/step - loss: 0.0620 - sparse_categorical_accuracy: 0.9788 - val_loss: 0.2021 - val_sparse_categorical_accuracy: 0.9223 - learning_rate: 5.0000e-04
+## Epoch 168/500
+## 90/90 - 0s - 1ms/step - loss: 0.0669 - sparse_categorical_accuracy: 0.9771 - val_loss: 0.1003 - val_sparse_categorical_accuracy: 0.9598 - learning_rate: 2.5000e-04
+## Epoch 169/500
+## 90/90 - 0s - 2ms/step - loss: 0.0578 - sparse_categorical_accuracy: 0.9809 - val_loss: 0.0992 - val_sparse_categorical_accuracy: 0.9681 - learning_rate: 2.5000e-04
+## Epoch 170/500
+## 90/90 - 0s - 1ms/step - loss: 0.0562 - sparse_categorical_accuracy: 0.9819 - val_loss: 0.0970 - val_sparse_categorical_accuracy: 0.9695 - learning_rate: 2.5000e-04
+## Epoch 171/500
+## 90/90 - 0s - 1ms/step - loss: 0.0535 - sparse_categorical_accuracy: 0.9819 - val_loss: 0.1347 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 2.5000e-04
+## Epoch 172/500
+## 90/90 - 0s - 1ms/step - loss: 0.0558 - sparse_categorical_accuracy: 0.9816 - val_loss: 0.0975 - val_sparse_categorical_accuracy: 0.9626 - learning_rate: 2.5000e-04
+## Epoch 173/500
+## 90/90 - 0s - 1ms/step - loss: 0.0536 - sparse_categorical_accuracy: 0.9819 - val_loss: 0.1328 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 2.5000e-04
+## Epoch 174/500
+## 90/90 - 0s - 1ms/step - loss: 0.0550 - sparse_categorical_accuracy: 0.9819 - val_loss: 0.1243 - val_sparse_categorical_accuracy: 0.9501 - learning_rate: 2.5000e-04
+## Epoch 175/500
+## 90/90 - 0s - 1ms/step - loss: 0.0583 - sparse_categorical_accuracy: 0.9813 - val_loss: 0.1018 - val_sparse_categorical_accuracy: 0.9556 - learning_rate: 2.5000e-04
+## Epoch 176/500
+## 90/90 - 0s - 1ms/step - loss: 0.0548 - sparse_categorical_accuracy: 0.9795 - val_loss: 0.1016 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 2.5000e-04
+## Epoch 177/500
+## 90/90 - 0s - 1ms/step - loss: 0.0541 - sparse_categorical_accuracy: 0.9837 - val_loss: 0.1646 - val_sparse_categorical_accuracy: 0.9320 - learning_rate: 2.5000e-04
+## Epoch 178/500
+## 90/90 - 0s - 1ms/step - loss: 0.0522 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.1151 - val_sparse_categorical_accuracy: 0.9515 - learning_rate: 2.5000e-04
+## Epoch 179/500
+## 90/90 - 0s - 1ms/step - loss: 0.0538 - sparse_categorical_accuracy: 0.9806 - val_loss: 0.1049 - val_sparse_categorical_accuracy: 0.9639 - learning_rate: 2.5000e-04
+## Epoch 180/500
+## 90/90 - 0s - 1ms/step - loss: 0.0531 - sparse_categorical_accuracy: 0.9830 - val_loss: 0.1178 - val_sparse_categorical_accuracy: 0.9639 - learning_rate: 2.5000e-04
+## Epoch 181/500
+## 90/90 - 0s - 1ms/step - loss: 0.0562 - sparse_categorical_accuracy: 0.9844 - val_loss: 0.1024 - val_sparse_categorical_accuracy: 0.9598 - learning_rate: 2.5000e-04
+## Epoch 182/500
+## 90/90 - 0s - 1ms/step - loss: 0.0525 - sparse_categorical_accuracy: 0.9833 - val_loss: 0.1600 - val_sparse_categorical_accuracy: 0.9376 - learning_rate: 2.5000e-04
+## Epoch 183/500
+## 90/90 - 0s - 1ms/step - loss: 0.0554 - sparse_categorical_accuracy: 0.9816 - val_loss: 0.1395 - val_sparse_categorical_accuracy: 0.9501 - learning_rate: 2.5000e-04
+## Epoch 184/500
+## 90/90 - 0s - 1ms/step - loss: 0.0514 - sparse_categorical_accuracy: 0.9844 - val_loss: 0.1340 - val_sparse_categorical_accuracy: 0.9459 - learning_rate: 2.5000e-04
+## Epoch 185/500
+## 90/90 - 0s - 1ms/step - loss: 0.0531 - sparse_categorical_accuracy: 0.9823 - val_loss: 0.1383 - val_sparse_categorical_accuracy: 0.9473 - learning_rate: 2.5000e-04
+## Epoch 186/500
+## 90/90 - 0s - 1ms/step - loss: 0.0574 - sparse_categorical_accuracy: 0.9823 - val_loss: 0.1325 - val_sparse_categorical_accuracy: 0.9515 - learning_rate: 2.5000e-04
+## Epoch 187/500
+## 90/90 - 0s - 1ms/step - loss: 0.0532 - sparse_categorical_accuracy: 0.9819 - val_loss: 0.1442 - val_sparse_categorical_accuracy: 0.9487 - learning_rate: 2.5000e-04
+## Epoch 188/500
+## 90/90 - 0s - 1ms/step - loss: 0.0533 - sparse_categorical_accuracy: 0.9840 - val_loss: 0.1016 - val_sparse_categorical_accuracy: 0.9681 - learning_rate: 2.5000e-04
+## Epoch 189/500
+## 90/90 - 0s - 1ms/step - loss: 0.0493 - sparse_categorical_accuracy: 0.9861 - val_loss: 0.1639 - val_sparse_categorical_accuracy: 0.9445 - learning_rate: 2.5000e-04
+## Epoch 190/500
+## 90/90 - 0s - 1ms/step - loss: 0.0583 - sparse_categorical_accuracy: 0.9802 - val_loss: 0.3468 - val_sparse_categorical_accuracy: 0.8863 - learning_rate: 2.5000e-04
+## Epoch 191/500
+## 90/90 - 0s - 1ms/step - loss: 0.0496 - sparse_categorical_accuracy: 0.9819 - val_loss: 0.1010 - val_sparse_categorical_accuracy: 0.9709 - learning_rate: 1.2500e-04
+## Epoch 192/500
+## 90/90 - 0s - 1ms/step - loss: 0.0536 - sparse_categorical_accuracy: 0.9819 - val_loss: 0.1100 - val_sparse_categorical_accuracy: 0.9695 - learning_rate: 1.2500e-04
+## Epoch 193/500
+## 90/90 - 0s - 1ms/step - loss: 0.0497 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.1388 - val_sparse_categorical_accuracy: 0.9528 - learning_rate: 1.2500e-04
+## Epoch 194/500
+## 90/90 - 0s - 1ms/step - loss: 0.0490 - sparse_categorical_accuracy: 0.9826 - val_loss: 0.0983 - val_sparse_categorical_accuracy: 0.9695 - learning_rate: 1.2500e-04
+## Epoch 195/500
+## 90/90 - 0s - 1ms/step - loss: 0.0499 - sparse_categorical_accuracy: 0.9858 - val_loss: 0.1001 - val_sparse_categorical_accuracy: 0.9709 - learning_rate: 1.2500e-04
+## Epoch 196/500
+## 90/90 - 0s - 1ms/step - loss: 0.0517 - sparse_categorical_accuracy: 0.9823 - val_loss: 0.1001 - val_sparse_categorical_accuracy: 0.9612 - learning_rate: 1.2500e-04
+## Epoch 197/500
+## 90/90 - 0s - 1ms/step - loss: 0.0489 - sparse_categorical_accuracy: 0.9840 - val_loss: 0.0987 - val_sparse_categorical_accuracy: 0.9695 - learning_rate: 1.2500e-04
+## Epoch 198/500
+## 90/90 - 0s - 1ms/step - loss: 0.0472 - sparse_categorical_accuracy: 0.9889 - val_loss: 0.1672 - val_sparse_categorical_accuracy: 0.9362 - learning_rate: 1.2500e-04
+## Epoch 199/500
+## 90/90 - 0s - 1ms/step - loss: 0.0508 - sparse_categorical_accuracy: 0.9826 - val_loss: 0.0990 - val_sparse_categorical_accuracy: 0.9709 - learning_rate: 1.2500e-04
+## Epoch 200/500
+## 90/90 - 0s - 1ms/step - loss: 0.0497 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.1290 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 1.2500e-04
+## Epoch 201/500
+## 90/90 - 0s - 1ms/step - loss: 0.0480 - sparse_categorical_accuracy: 0.9844 - val_loss: 0.1390 - val_sparse_categorical_accuracy: 0.9542 - learning_rate: 1.2500e-04
+## Epoch 202/500
+## 90/90 - 0s - 1ms/step - loss: 0.0477 - sparse_categorical_accuracy: 0.9872 - val_loss: 0.1194 - val_sparse_categorical_accuracy: 0.9612 - learning_rate: 1.2500e-04
+## Epoch 203/500
+## 90/90 - 0s - 1ms/step - loss: 0.0508 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.1078 - val_sparse_categorical_accuracy: 0.9653 - learning_rate: 1.2500e-04
+## Epoch 204/500
+## 90/90 - 0s - 1ms/step - loss: 0.0449 - sparse_categorical_accuracy: 0.9875 - val_loss: 0.1227 - val_sparse_categorical_accuracy: 0.9612 - learning_rate: 1.2500e-04
+## Epoch 205/500
+## 90/90 - 0s - 1ms/step - loss: 0.0487 - sparse_categorical_accuracy: 0.9833 - val_loss: 0.1194 - val_sparse_categorical_accuracy: 0.9626 - learning_rate: 1.2500e-04
+## Epoch 206/500
+## 90/90 - 0s - 1ms/step - loss: 0.0491 - sparse_categorical_accuracy: 0.9840 - val_loss: 0.1540 - val_sparse_categorical_accuracy: 0.9431 - learning_rate: 1.2500e-04
+## Epoch 207/500
+## 90/90 - 0s - 1ms/step - loss: 0.0529 - sparse_categorical_accuracy: 0.9823 - val_loss: 0.1023 - val_sparse_categorical_accuracy: 0.9667 - learning_rate: 1.2500e-04
+## Epoch 208/500
+## 90/90 - 0s - 1ms/step - loss: 0.0488 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.1295 - val_sparse_categorical_accuracy: 0.9612 - learning_rate: 1.2500e-04
+## Epoch 209/500
+## 90/90 - 0s - 1ms/step - loss: 0.0455 - sparse_categorical_accuracy: 0.9844 - val_loss: 0.1169 - val_sparse_categorical_accuracy: 0.9667 - learning_rate: 1.2500e-04
+## Epoch 210/500
+## 90/90 - 0s - 1ms/step - loss: 0.0489 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.0976 - val_sparse_categorical_accuracy: 0.9626 - learning_rate: 1.2500e-04
+## Epoch 211/500
+## 90/90 - 0s - 1ms/step - loss: 0.0475 - sparse_categorical_accuracy: 0.9861 - val_loss: 0.1001 - val_sparse_categorical_accuracy: 0.9695 - learning_rate: 1.0000e-04
+## Epoch 212/500
+## 90/90 - 0s - 1ms/step - loss: 0.0474 - sparse_categorical_accuracy: 0.9878 - val_loss: 0.0986 - val_sparse_categorical_accuracy: 0.9695 - learning_rate: 1.0000e-04
+## Epoch 213/500
+## 90/90 - 0s - 1ms/step - loss: 0.0483 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.0997 - val_sparse_categorical_accuracy: 0.9584 - learning_rate: 1.0000e-04
+## Epoch 214/500
+## 90/90 - 0s - 1ms/step - loss: 0.0490 - sparse_categorical_accuracy: 0.9851 - val_loss: 0.1029 - val_sparse_categorical_accuracy: 0.9695 - learning_rate: 1.0000e-04
+## Epoch 215/500
+## 90/90 - 0s - 1ms/step - loss: 0.0464 - sparse_categorical_accuracy: 0.9858 - val_loss: 0.1445 - val_sparse_categorical_accuracy: 0.9473 - learning_rate: 1.0000e-04
+## Epoch 216/500
+## 90/90 - 0s - 1ms/step - loss: 0.0456 - sparse_categorical_accuracy: 0.9858 - val_loss: 0.1137 - val_sparse_categorical_accuracy: 0.9681 - learning_rate: 1.0000e-04
+## Epoch 217/500
+## 90/90 - 0s - 1ms/step - loss: 0.0463 - sparse_categorical_accuracy: 0.9844 - val_loss: 0.1345 - val_sparse_categorical_accuracy: 0.9612 - learning_rate: 1.0000e-04
+## Epoch 218/500
+## 90/90 - 0s - 1ms/step - loss: 0.0501 - sparse_categorical_accuracy: 0.9830 - val_loss: 0.0979 - val_sparse_categorical_accuracy: 0.9653 - learning_rate: 1.0000e-04
+## Epoch 219/500
+## 90/90 - 0s - 1ms/step - loss: 0.0445 - sparse_categorical_accuracy: 0.9858 - val_loss: 0.1149 - val_sparse_categorical_accuracy: 0.9667 - learning_rate: 1.0000e-04
+## Epoch 220/500
+## 90/90 - 0s - 1ms/step - loss: 0.0463 - sparse_categorical_accuracy: 0.9872 - val_loss: 0.1003 - val_sparse_categorical_accuracy: 0.9612 - learning_rate: 1.0000e-04
+## Epoch 220: early stopping
+
+
+

Evaluate model on test data +

+
+model <- load_model("best_model.keras")
+
+results <- model |> evaluate(x_test, y_test)
+
## 42/42 - 0s - 10ms/step - loss: 0.0917 - sparse_categorical_accuracy: 0.9712
+
+str(results)
+
## List of 2
+##  $ loss                       : num 0.0917
+##  $ sparse_categorical_accuracy: num 0.971
+
+cat(
+  "Test accuracy: ", results$sparse_categorical_accuracy, "\n",
+  "Test loss: ", results$loss, "\n",
+  sep = ""
+)
+
## Test accuracy: 0.9712121
+## Test loss: 0.09173314
+
+
+

Plot the model’s training history +

+
+plot(history)
+
+Plot of Training History Metrics
Plot of Training History Metrics
+
+

Plot just the training and validation accuracy:

+
+plot(history, metric = "sparse_categorical_accuracy") +
+  # scale x axis to actual number of epochs run before early stopping
+  ggplot2::xlim(0, length(history$metrics$loss))
+
+Plot of Accuracy During Training
Plot of Accuracy During Training
+
+

We can see how the training accuracy reaches almost 0.95 after 100 +epochs. However, by observing the validation accuracy we can see how the +network still needs training until it reaches almost 0.97 for both the +validation and the training accuracy after 200 epochs. Beyond the 200th +epoch, if we continue on training, the validation accuracy will start +decreasing while the training accuracy will continue on increasing: the +model starts overfitting.

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-12-1.png b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-12-1.png new file mode 100644 index 0000000000..42a808b283 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-12-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-13-1.png b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-13-1.png new file mode 100644 index 0000000000..954bd4c108 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-13-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-3-1.png b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-3-1.png new file mode 100644 index 0000000000..a2016e7967 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-3-1.png differ diff --git a/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-9-1.png b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-9-1.png new file mode 100644 index 0000000000..45d0436e03 Binary files /dev/null and b/docs/articles/examples/timeseries/timeseries_classification_from_scratch/unnamed-chunk-9-1.png differ diff --git a/docs/articles/examples/vision/autoencoder.html b/docs/articles/examples/vision/autoencoder.html new file mode 100644 index 0000000000..2713fc0d21 --- /dev/null +++ b/docs/articles/examples/vision/autoencoder.html @@ -0,0 +1,597 @@ + + + + + + + + +Convolutional autoencoder for image denoising • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This example demonstrates how to implement a deep convolutional +autoencoder for image denoising, mapping noisy digits images from the +MNIST dataset to clean digits images. This implementation is based on an +original blog post titled Building +Autoencoders in Keras by François Chollet.

+
+
+

Setup +

+
+library(keras3)
+
+# Normalizes the supplied array and reshapes it.
+preprocess <- function(array) {
+  array_reshape(array/255, c(dim(array)[1], 28, 28, 1))
+}
+
+# Adds random noise to each image in the supplied array.
+noise <- function(array) {
+  noise_factor <- 0.4
+  noisy_array <- array + noise_factor * random_normal(dim(array))
+  op_clip(noisy_array, 0.0, 1.0)
+}
+
+display <- function(array1, array2) {
+  n <- 2
+  indices <- sample.int(dim(array1)[1], n)
+  images1 <- as.array(array1)[indices, , , ]
+  images2 <- as.array(array2)[indices, , , ]
+
+  par(mfrow = c(2, n), mar = c(0, 0, 0, 0))
+  for (i in seq_len(n)) {
+    plot(as.raster(images1[i, , ]))
+    plot(as.raster(images2[i, , ]))
+  }
+}
+
+
+

Prepare the data +

+
+# Since we only need images from the dataset to encode and decode, we
+# won't use the labels.
+c(c(train_data, .), c(test_data, .)) %<-% dataset_mnist()
+
+# Normalize and reshape the data
+train_data <- preprocess(train_data)
+test_data <- preprocess(test_data)
+
+# Create a copy of the data with added noise
+noisy_train_data <- noise(train_data)
+noisy_test_data <- noise(test_data)
+
+# Display the train data and a version of it with added noise
+display(train_data, noisy_train_data)
+
+plot of chunk unnamed-chunk-2
plot of chunk unnamed-chunk-2
+
+
+
+

Build the autoencoder +

+

We are going to use the Functional API to build our convolutional +autoencoder.

+
+input <- keras_input(shape = c(28, 28, 1))
+
+# Encoder
+enc <- input |>
+  layer_conv_2d(filters = 32, kernel_size = c(3, 3),
+                activation = "relu", padding = "same") |>
+  layer_max_pooling_2d(pool_size = c(2, 2), padding = "same") |>
+  layer_conv_2d(filters = 32, kernel_size = c(3, 3),
+                activation = "relu", padding = "same") |>
+  layer_max_pooling_2d(pool_size = c(2, 2), padding = "same")
+
+# Decoder
+dec <- enc |>
+  layer_conv_2d_transpose(filters = 32, kernel_size = c(3, 3), strides = 2,
+                          activation = "relu", padding = "same") |>
+  layer_conv_2d_transpose(filters = 32, kernel_size = c(3, 3), strides = 2,
+                          activation = "relu", padding = "same") |>
+  layer_conv_2d(filters = 1, kernel_size = c(3, 3),
+                activation = "sigmoid", padding = "same")
+
+# Autoencoder
+autoencoder <- keras_model(input, dec)
+autoencoder |> compile(optimizer = "adam", loss = "binary_crossentropy")
+autoencoder |> summary()
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ input_layer (InputLayer)        │ (None, 28, 28, 1)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d (Conv2D)                 │ (None, 28, 28, 32)     │           320
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 14, 14, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 14, 14, 32)     │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d_1 (MaxPooling2D)  │ (None, 7, 7, 32)       │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose                │ (None, 14, 14, 32)     │         9,248
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_1              │ (None, 28, 28, 32)     │         9,248
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 28, 28, 1)      │           289
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 28,353 (110.75 KB)
+##  Trainable params: 28,353 (110.75 KB)
+##  Non-trainable params: 0 (0.00 B)
+

Now we can train our autoencoder using train_data as +both our input data and target. Notice we are setting up the validation +data using the same format.

+
+autoencoder |> fit(
+  x = train_data,
+  y = train_data,
+  epochs = 50,
+  batch_size = 128,
+  shuffle = TRUE,
+  validation_data = list(test_data, test_data),
+)
+
## Epoch 1/50
+## 469/469 - 4s - 9ms/step - loss: 0.1371 - val_loss: 0.0727
+## Epoch 2/50
+## 469/469 - 1s - 3ms/step - loss: 0.0712 - val_loss: 0.0692
+## Epoch 3/50
+## 469/469 - 1s - 3ms/step - loss: 0.0690 - val_loss: 0.0679
+## Epoch 4/50
+## 469/469 - 1s - 3ms/step - loss: 0.0679 - val_loss: 0.0671
+## Epoch 5/50
+## 469/469 - 1s - 3ms/step - loss: 0.0672 - val_loss: 0.0665
+## Epoch 6/50
+## 469/469 - 1s - 3ms/step - loss: 0.0667 - val_loss: 0.0660
+## Epoch 7/50
+## 469/469 - 1s - 3ms/step - loss: 0.0662 - val_loss: 0.0656
+## Epoch 8/50
+## 469/469 - 1s - 3ms/step - loss: 0.0659 - val_loss: 0.0653
+## Epoch 9/50
+## 469/469 - 1s - 3ms/step - loss: 0.0656 - val_loss: 0.0650
+## Epoch 10/50
+## 469/469 - 1s - 3ms/step - loss: 0.0653 - val_loss: 0.0648
+## Epoch 11/50
+## 469/469 - 1s - 3ms/step - loss: 0.0651 - val_loss: 0.0646
+## Epoch 12/50
+## 469/469 - 1s - 3ms/step - loss: 0.0649 - val_loss: 0.0644
+## Epoch 13/50
+## 469/469 - 1s - 3ms/step - loss: 0.0648 - val_loss: 0.0643
+## Epoch 14/50
+## 469/469 - 1s - 3ms/step - loss: 0.0646 - val_loss: 0.0641
+## Epoch 15/50
+## 469/469 - 1s - 3ms/step - loss: 0.0645 - val_loss: 0.0640
+## Epoch 16/50
+## 469/469 - 1s - 3ms/step - loss: 0.0644 - val_loss: 0.0639
+## Epoch 17/50
+## 469/469 - 1s - 3ms/step - loss: 0.0643 - val_loss: 0.0638
+## Epoch 18/50
+## 469/469 - 1s - 3ms/step - loss: 0.0642 - val_loss: 0.0637
+## Epoch 19/50
+## 469/469 - 1s - 3ms/step - loss: 0.0641 - val_loss: 0.0636
+## Epoch 20/50
+## 469/469 - 1s - 3ms/step - loss: 0.0640 - val_loss: 0.0636
+## Epoch 21/50
+## 469/469 - 1s - 3ms/step - loss: 0.0639 - val_loss: 0.0635
+## Epoch 22/50
+## 469/469 - 1s - 3ms/step - loss: 0.0639 - val_loss: 0.0635
+## Epoch 23/50
+## 469/469 - 1s - 3ms/step - loss: 0.0638 - val_loss: 0.0634
+## Epoch 24/50
+## 469/469 - 1s - 3ms/step - loss: 0.0637 - val_loss: 0.0634
+## Epoch 25/50
+## 469/469 - 1s - 3ms/step - loss: 0.0637 - val_loss: 0.0633
+## Epoch 26/50
+## 469/469 - 1s - 3ms/step - loss: 0.0636 - val_loss: 0.0633
+## Epoch 27/50
+## 469/469 - 1s - 3ms/step - loss: 0.0636 - val_loss: 0.0632
+## Epoch 28/50
+## 469/469 - 1s - 3ms/step - loss: 0.0635 - val_loss: 0.0632
+## Epoch 29/50
+## 469/469 - 1s - 3ms/step - loss: 0.0635 - val_loss: 0.0631
+## Epoch 30/50
+## 469/469 - 1s - 3ms/step - loss: 0.0634 - val_loss: 0.0631
+## Epoch 31/50
+## 469/469 - 1s - 3ms/step - loss: 0.0634 - val_loss: 0.0631
+## Epoch 32/50
+## 469/469 - 1s - 3ms/step - loss: 0.0634 - val_loss: 0.0630
+## Epoch 33/50
+## 469/469 - 1s - 3ms/step - loss: 0.0633 - val_loss: 0.0630
+## Epoch 34/50
+## 469/469 - 1s - 3ms/step - loss: 0.0633 - val_loss: 0.0629
+## Epoch 35/50
+## 469/469 - 1s - 3ms/step - loss: 0.0633 - val_loss: 0.0629
+## Epoch 36/50
+## 469/469 - 1s - 3ms/step - loss: 0.0632 - val_loss: 0.0629
+## Epoch 37/50
+## 469/469 - 1s - 3ms/step - loss: 0.0632 - val_loss: 0.0628
+## Epoch 38/50
+## 469/469 - 1s - 3ms/step - loss: 0.0632 - val_loss: 0.0628
+## Epoch 39/50
+## 469/469 - 1s - 3ms/step - loss: 0.0631 - val_loss: 0.0628
+## Epoch 40/50
+## 469/469 - 1s - 3ms/step - loss: 0.0631 - val_loss: 0.0628
+## Epoch 41/50
+## 469/469 - 1s - 3ms/step - loss: 0.0631 - val_loss: 0.0628
+## Epoch 42/50
+## 469/469 - 1s - 3ms/step - loss: 0.0630 - val_loss: 0.0627
+## Epoch 43/50
+## 469/469 - 1s - 3ms/step - loss: 0.0630 - val_loss: 0.0627
+## Epoch 44/50
+## 469/469 - 1s - 3ms/step - loss: 0.0630 - val_loss: 0.0627
+## Epoch 45/50
+## 469/469 - 1s - 3ms/step - loss: 0.0630 - val_loss: 0.0627
+## Epoch 46/50
+## 469/469 - 1s - 3ms/step - loss: 0.0629 - val_loss: 0.0627
+## Epoch 47/50
+## 469/469 - 1s - 3ms/step - loss: 0.0629 - val_loss: 0.0627
+## Epoch 48/50
+## 469/469 - 1s - 3ms/step - loss: 0.0629 - val_loss: 0.0626
+## Epoch 49/50
+## 469/469 - 1s - 3ms/step - loss: 0.0629 - val_loss: 0.0626
+## Epoch 50/50
+## 469/469 - 1s - 3ms/step - loss: 0.0629 - val_loss: 0.0626
+

Let’s predict on our test dataset and display the original image +together with the prediction from our autoencoder.

+

Notice how the predictions are pretty close to the original images, +although not quite the same.

+
+predictions <- autoencoder |> predict(test_data)
+
## 313/313 - 0s - 2ms/step
+
+display(test_data, predictions)
+
+plot of chunk unnamed-chunk-5
plot of chunk unnamed-chunk-5
+
+

Now that we know that our autoencoder works, let’s retrain it using +the noisy data as our input and the clean data as our target. We want +our autoencoder to learn how to denoise the images.

+
+autoencoder |> fit(
+  x = noisy_train_data,
+  y = train_data,
+  epochs = 100,
+  batch_size = 128,
+  shuffle = TRUE,
+  validation_data = list(noisy_test_data, test_data),
+)
+
## Epoch 1/100
+## 469/469 - 1s - 3ms/step - loss: 0.1004 - val_loss: 0.0939
+## Epoch 2/100
+## 469/469 - 1s - 3ms/step - loss: 0.0936 - val_loss: 0.0919
+## Epoch 3/100
+## 469/469 - 1s - 3ms/step - loss: 0.0921 - val_loss: 0.0908
+## Epoch 4/100
+## 469/469 - 1s - 3ms/step - loss: 0.0911 - val_loss: 0.0901
+## Epoch 5/100
+## 469/469 - 1s - 3ms/step - loss: 0.0904 - val_loss: 0.0896
+## Epoch 6/100
+## 469/469 - 1s - 3ms/step - loss: 0.0899 - val_loss: 0.0891
+## Epoch 7/100
+## 469/469 - 1s - 3ms/step - loss: 0.0894 - val_loss: 0.0887
+## Epoch 8/100
+## 469/469 - 1s - 3ms/step - loss: 0.0891 - val_loss: 0.0883
+## Epoch 9/100
+## 469/469 - 1s - 3ms/step - loss: 0.0887 - val_loss: 0.0880
+## Epoch 10/100
+## 469/469 - 1s - 3ms/step - loss: 0.0884 - val_loss: 0.0877
+## Epoch 11/100
+## 469/469 - 1s - 3ms/step - loss: 0.0881 - val_loss: 0.0875
+## Epoch 12/100
+## 469/469 - 1s - 3ms/step - loss: 0.0879 - val_loss: 0.0873
+## Epoch 13/100
+## 469/469 - 1s - 3ms/step - loss: 0.0877 - val_loss: 0.0871
+## Epoch 14/100
+## 469/469 - 1s - 3ms/step - loss: 0.0875 - val_loss: 0.0869
+## Epoch 15/100
+## 469/469 - 1s - 3ms/step - loss: 0.0873 - val_loss: 0.0867
+## Epoch 16/100
+## 469/469 - 1s - 3ms/step - loss: 0.0871 - val_loss: 0.0866
+## Epoch 17/100
+## 469/469 - 1s - 3ms/step - loss: 0.0870 - val_loss: 0.0865
+## Epoch 18/100
+## 469/469 - 1s - 3ms/step - loss: 0.0869 - val_loss: 0.0864
+## Epoch 19/100
+## 469/469 - 1s - 3ms/step - loss: 0.0867 - val_loss: 0.0863
+## Epoch 20/100
+## 469/469 - 1s - 3ms/step - loss: 0.0866 - val_loss: 0.0862
+## Epoch 21/100
+## 469/469 - 1s - 3ms/step - loss: 0.0865 - val_loss: 0.0861
+## Epoch 22/100
+## 469/469 - 1s - 3ms/step - loss: 0.0864 - val_loss: 0.0860
+## Epoch 23/100
+## 469/469 - 1s - 3ms/step - loss: 0.0864 - val_loss: 0.0859
+## Epoch 24/100
+## 469/469 - 1s - 3ms/step - loss: 0.0863 - val_loss: 0.0858
+## Epoch 25/100
+## 469/469 - 1s - 3ms/step - loss: 0.0862 - val_loss: 0.0858
+## Epoch 26/100
+## 469/469 - 1s - 3ms/step - loss: 0.0862 - val_loss: 0.0857
+## Epoch 27/100
+## 469/469 - 1s - 3ms/step - loss: 0.0861 - val_loss: 0.0856
+## Epoch 28/100
+## 469/469 - 1s - 3ms/step - loss: 0.0860 - val_loss: 0.0856
+## Epoch 29/100
+## 469/469 - 1s - 3ms/step - loss: 0.0860 - val_loss: 0.0855
+## Epoch 30/100
+## 469/469 - 1s - 3ms/step - loss: 0.0859 - val_loss: 0.0855
+## Epoch 31/100
+## 469/469 - 1s - 3ms/step - loss: 0.0859 - val_loss: 0.0854
+## Epoch 32/100
+## 469/469 - 1s - 3ms/step - loss: 0.0858 - val_loss: 0.0854
+## Epoch 33/100
+## 469/469 - 1s - 3ms/step - loss: 0.0858 - val_loss: 0.0854
+## Epoch 34/100
+## 469/469 - 1s - 3ms/step - loss: 0.0858 - val_loss: 0.0853
+## Epoch 35/100
+## 469/469 - 1s - 3ms/step - loss: 0.0857 - val_loss: 0.0853
+## Epoch 36/100
+## 469/469 - 1s - 3ms/step - loss: 0.0857 - val_loss: 0.0853
+## Epoch 37/100
+## 469/469 - 1s - 3ms/step - loss: 0.0857 - val_loss: 0.0852
+## Epoch 38/100
+## 469/469 - 1s - 3ms/step - loss: 0.0856 - val_loss: 0.0852
+## Epoch 39/100
+## 469/469 - 1s - 3ms/step - loss: 0.0856 - val_loss: 0.0852
+## Epoch 40/100
+## 469/469 - 1s - 3ms/step - loss: 0.0856 - val_loss: 0.0852
+## Epoch 41/100
+## 469/469 - 1s - 3ms/step - loss: 0.0855 - val_loss: 0.0851
+## Epoch 42/100
+## 469/469 - 1s - 3ms/step - loss: 0.0855 - val_loss: 0.0851
+## Epoch 43/100
+## 469/469 - 1s - 3ms/step - loss: 0.0855 - val_loss: 0.0851
+## Epoch 44/100
+## 469/469 - 1s - 3ms/step - loss: 0.0855 - val_loss: 0.0851
+## Epoch 45/100
+## 469/469 - 1s - 3ms/step - loss: 0.0854 - val_loss: 0.0850
+## Epoch 46/100
+## 469/469 - 1s - 3ms/step - loss: 0.0854 - val_loss: 0.0850
+## Epoch 47/100
+## 469/469 - 1s - 3ms/step - loss: 0.0854 - val_loss: 0.0850
+## Epoch 48/100
+## 469/469 - 1s - 3ms/step - loss: 0.0854 - val_loss: 0.0850
+## Epoch 49/100
+## 469/469 - 1s - 3ms/step - loss: 0.0853 - val_loss: 0.0850
+## Epoch 50/100
+## 469/469 - 1s - 3ms/step - loss: 0.0853 - val_loss: 0.0849
+## Epoch 51/100
+## 469/469 - 1s - 3ms/step - loss: 0.0853 - val_loss: 0.0849
+## Epoch 52/100
+## 469/469 - 1s - 3ms/step - loss: 0.0853 - val_loss: 0.0849
+## Epoch 53/100
+## 469/469 - 1s - 3ms/step - loss: 0.0853 - val_loss: 0.0849
+## Epoch 54/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0849
+## Epoch 55/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0849
+## Epoch 56/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0848
+## Epoch 57/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0848
+## Epoch 58/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0848
+## Epoch 59/100
+## 469/469 - 1s - 3ms/step - loss: 0.0852 - val_loss: 0.0848
+## Epoch 60/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0848
+## Epoch 61/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0848
+## Epoch 62/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0848
+## Epoch 63/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0848
+## Epoch 64/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0847
+## Epoch 65/100
+## 469/469 - 1s - 3ms/step - loss: 0.0851 - val_loss: 0.0847
+## Epoch 66/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0847
+## Epoch 67/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0847
+## Epoch 68/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0847
+## Epoch 69/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0847
+## Epoch 70/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0847
+## Epoch 71/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0846
+## Epoch 72/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0846
+## Epoch 73/100
+## 469/469 - 1s - 3ms/step - loss: 0.0850 - val_loss: 0.0846
+## Epoch 74/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 75/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 76/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 77/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 78/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 79/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 80/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 81/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 82/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 83/100
+## 469/469 - 1s - 3ms/step - loss: 0.0849 - val_loss: 0.0846
+## Epoch 84/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0846
+## Epoch 85/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 86/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 87/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 88/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 89/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 90/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 91/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 92/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 93/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 94/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 95/100
+## 469/469 - 1s - 3ms/step - loss: 0.0848 - val_loss: 0.0845
+## Epoch 96/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0845
+## Epoch 97/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0845
+## Epoch 98/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0845
+## Epoch 99/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0845
+## Epoch 100/100
+## 469/469 - 1s - 3ms/step - loss: 0.0847 - val_loss: 0.0845
+

Let’s now predict on the noisy data and display the results of our +autoencoder.

+

Notice how the autoencoder does an amazing job at removing the noise +from the input images.

+
+predictions <- autoencoder |> predict(noisy_test_data)
+
## 313/313 - 0s - 689us/step
+
+display(noisy_test_data, predictions)
+
+plot of chunk unnamed-chunk-7
plot of chunk unnamed-chunk-7
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/vision/autoencoder/unnamed-chunk-2-1.png b/docs/articles/examples/vision/autoencoder/unnamed-chunk-2-1.png new file mode 100644 index 0000000000..e23f5e78c7 Binary files /dev/null and b/docs/articles/examples/vision/autoencoder/unnamed-chunk-2-1.png differ diff --git a/docs/articles/examples/vision/autoencoder/unnamed-chunk-5-1.png b/docs/articles/examples/vision/autoencoder/unnamed-chunk-5-1.png new file mode 100644 index 0000000000..ef0ecb7d5f Binary files /dev/null and b/docs/articles/examples/vision/autoencoder/unnamed-chunk-5-1.png differ diff --git a/docs/articles/examples/vision/autoencoder/unnamed-chunk-7-1.png b/docs/articles/examples/vision/autoencoder/unnamed-chunk-7-1.png new file mode 100644 index 0000000000..76fc3242c9 Binary files /dev/null and b/docs/articles/examples/vision/autoencoder/unnamed-chunk-7-1.png differ diff --git a/docs/articles/examples/vision/mnist_convnet.html b/docs/articles/examples/vision/mnist_convnet.html new file mode 100644 index 0000000000..ece6a7dfbd --- /dev/null +++ b/docs/articles/examples/vision/mnist_convnet.html @@ -0,0 +1,265 @@ + + + + + + + + +Simple MNIST convnet • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+ +
+
+

Prepare the data +

+
+# Model / data parameters
+num_classes <- 10
+input_shape <- c(28, 28, 1)
+
+# Load the data and split it between train and test sets
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+
+# Scale images to the [0, 1] range
+x_train <- x_train / 255
+x_test <- x_test / 255
+# Make sure images have shape (28, 28, 1)
+x_train <- op_expand_dims(x_train, -1)
+x_test <- op_expand_dims(x_test, -1)
+
+
+dim(x_train)
+
## [1] 60000    28    28     1
+
+dim(x_test)
+
## [1] 10000    28    28     1
+
+# convert class vectors to binary class matrices
+y_train <- to_categorical(y_train, num_classes)
+y_test <- to_categorical(y_test, num_classes)
+
+
+

Build the model +

+
+model <- keras_model_sequential(input_shape = input_shape)
+model |>
+  layer_conv_2d(filters = 32, kernel_size = c(3, 3), activation = "relu") |>
+  layer_max_pooling_2d(pool_size = c(2, 2)) |>
+  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") |>
+  layer_max_pooling_2d(pool_size = c(2, 2)) |>
+  layer_flatten() |>
+  layer_dropout(rate = 0.5) |>
+  layer_dense(units = num_classes, activation = "softmax")
+
+summary(model)
+
## Model: "sequential"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv2d (Conv2D)                 │ (None, 26, 26, 32)     │           320
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 13, 13, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 11, 11, 64)     │        18,496
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d_1 (MaxPooling2D)  │ (None, 5, 5, 64)       │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ flatten (Flatten)               │ (None, 1600)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, 1600)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense (Dense)                   │ (None, 10)             │        16,010
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 34,826 (136.04 KB)
+##  Trainable params: 34,826 (136.04 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+
+

Train the model +

+
+batch_size <- 128
+epochs <- 15
+
+model |> compile(
+  loss = "categorical_crossentropy",
+  optimizer = "adam",
+  metrics = "accuracy"
+)
+
+model |> fit(
+  x_train, y_train,
+  batch_size = batch_size,
+  epochs = epochs,
+  validation_split = 0.1
+)
+
## Epoch 1/15
+## 422/422 - 4s - 10ms/step - accuracy: 0.8895 - loss: 0.3636 - val_accuracy: 0.9787 - val_loss: 0.0793
+## Epoch 2/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9666 - loss: 0.1112 - val_accuracy: 0.9852 - val_loss: 0.0550
+## Epoch 3/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9743 - loss: 0.0824 - val_accuracy: 0.9882 - val_loss: 0.0439
+## Epoch 4/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9786 - loss: 0.0695 - val_accuracy: 0.9897 - val_loss: 0.0399
+## Epoch 5/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9803 - loss: 0.0626 - val_accuracy: 0.9900 - val_loss: 0.0354
+## Epoch 6/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9823 - loss: 0.0557 - val_accuracy: 0.9912 - val_loss: 0.0332
+## Epoch 7/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9837 - loss: 0.0499 - val_accuracy: 0.9918 - val_loss: 0.0310
+## Epoch 8/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9849 - loss: 0.0481 - val_accuracy: 0.9920 - val_loss: 0.0310
+## Epoch 9/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9861 - loss: 0.0444 - val_accuracy: 0.9917 - val_loss: 0.0302
+## Epoch 10/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9863 - loss: 0.0439 - val_accuracy: 0.9913 - val_loss: 0.0297
+## Epoch 11/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9872 - loss: 0.0393 - val_accuracy: 0.9917 - val_loss: 0.0306
+## Epoch 12/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9874 - loss: 0.0372 - val_accuracy: 0.9922 - val_loss: 0.0289
+## Epoch 13/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9888 - loss: 0.0344 - val_accuracy: 0.9918 - val_loss: 0.0293
+## Epoch 14/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9890 - loss: 0.0343 - val_accuracy: 0.9918 - val_loss: 0.0282
+## Epoch 15/15
+## 422/422 - 1s - 2ms/step - accuracy: 0.9894 - loss: 0.0322 - val_accuracy: 0.9915 - val_loss: 0.0284
+
+
+

Evaluate the trained model +

+
+score <- model |> evaluate(x_test, y_test, verbose = 0)
+score
+
## $accuracy
+## [1] 0.9912
+##
+## $loss
+## [1] 0.02448307
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/vision/oxford_pets_image_segmentation.html b/docs/articles/examples/vision/oxford_pets_image_segmentation.html new file mode 100644 index 0000000000..54f6581786 --- /dev/null +++ b/docs/articles/examples/vision/oxford_pets_image_segmentation.html @@ -0,0 +1,741 @@ + + + + + + + + +Image segmentation with a U-Net-like architecture • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Download the data +

+
+options(timeout = 5000)
+download.file(
+  "https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz",
+  "datasets/images.tar.gz"
+)
+download.file(
+  "https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz",
+  "datasets/annotations.tar.gz"
+)
+
+untar("datasets/images.tar.gz", exdir = "datasets")
+untar("datasets/annotations.tar.gz", exdir = "datasets")
+
+
+

Prepare paths of input images and target segmentation masks +

+
+library(keras3)
+input_dir <- "datasets/images/"
+target_dir <- "datasets/annotations/trimaps/"
+img_size <- c(160, 160)
+num_classes <- 3
+batch_size <- 32
+
+input_img_paths <- fs::dir_ls(input_dir, glob = "*.jpg") |> sort()
+target_img_paths <- fs::dir_ls(target_dir, glob = "*.png") |> sort()
+
+cat("Number of samples:", length(input_img_paths), "\n")
+
## Number of samples: 7390
+
+for (i in 1:10) {
+  cat(input_img_paths[i], "|", target_img_paths[i], "\n")
+}
+
## datasets/images/Abyssinian_1.jpg | datasets/annotations/trimaps/Abyssinian_1.png
+## datasets/images/Abyssinian_10.jpg | datasets/annotations/trimaps/Abyssinian_10.png
+## datasets/images/Abyssinian_100.jpg | datasets/annotations/trimaps/Abyssinian_100.png
+## datasets/images/Abyssinian_101.jpg | datasets/annotations/trimaps/Abyssinian_101.png
+## datasets/images/Abyssinian_102.jpg | datasets/annotations/trimaps/Abyssinian_102.png
+## datasets/images/Abyssinian_103.jpg | datasets/annotations/trimaps/Abyssinian_103.png
+## datasets/images/Abyssinian_104.jpg | datasets/annotations/trimaps/Abyssinian_104.png
+## datasets/images/Abyssinian_105.jpg | datasets/annotations/trimaps/Abyssinian_105.png
+## datasets/images/Abyssinian_106.jpg | datasets/annotations/trimaps/Abyssinian_106.png
+## datasets/images/Abyssinian_107.jpg | datasets/annotations/trimaps/Abyssinian_107.png
+
+
+

What does one input image and corresponding segmentation mask look +like? +

+
+# Display input image #10
+input_img_paths[10] |>
+  jpeg::readJPEG() |>
+  as.raster() |>
+  plot()
+
+plot of chunk unnamed-chunk-4
plot of chunk unnamed-chunk-4
+
+
+target_img_paths[10] |>
+  png::readPNG() |>
+  magrittr::multiply_by(255)|>
+  as.raster(max = 3) |>
+  plot()
+
+plot of chunk unnamed-chunk-4
plot of chunk unnamed-chunk-4
+
+
+
+

Prepare dataset to load & vectorize batches of data +

+
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+
+
+# Returns a tf_dataset
+get_dataset <- function(batch_size, img_size, input_img_paths, target_img_paths,
+                        max_dataset_len = NULL) {
+
+  img_size <- as.integer(img_size)
+
+  load_img_masks <- function(input_img_path, target_img_path) {
+    input_img <- input_img_path |>
+      tf$io$read_file() |>
+      tf$io$decode_jpeg(channels = 3) |>
+      tf$image$resize(img_size) |>
+      tf$image$convert_image_dtype("float32")
+
+    target_img <- target_img_path |>
+      tf$io$read_file() |>
+      tf$io$decode_png(channels = 1) |>
+      tf$image$resize(img_size, method = "nearest") |>
+      tf$image$convert_image_dtype("uint8")
+
+    # Ground truth labels are 1, 2, 3. Subtract one to make them 0, 1, 2:
+    target_img <- target_img - 1L
+
+    list(input_img, target_img)
+  }
+
+  if (!is.null(max_dataset_len)) {
+    input_img_paths <- input_img_paths[1:max_dataset_len]
+    target_img_paths <- target_img_paths[1:max_dataset_len]
+  }
+
+  list(input_img_paths, target_img_paths) |>
+    tensor_slices_dataset() |>
+    dataset_map(load_img_masks, num_parallel_calls = tf$data$AUTOTUNE)|>
+    dataset_batch(batch_size)
+}
+
+
+

Prepare U-Net Xception-style model +

+
+get_model <- function(img_size, num_classes) {
+
+  inputs <- keras_input(shape = c(img_size, 3))
+
+  ### [First half of the network: downsampling inputs] ###
+
+  # Entry block
+  x <- inputs |>
+    layer_conv_2d(filters = 32, kernel_size = 3, strides = 2, padding = "same") |>
+    layer_batch_normalization() |>
+    layer_activation("relu")
+
+  previous_block_activation <- x  # Set aside residual
+
+  for (filters in c(64, 128, 256)) {
+    x <- x |>
+      layer_activation("relu") |>
+      layer_separable_conv_2d(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_activation("relu") |>
+      layer_separable_conv_2d(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_max_pooling_2d(pool_size = 3, strides = 2, padding = "same")
+
+    residual <- previous_block_activation |>
+      layer_conv_2d(filters = filters, kernel_size = 1, strides = 2, padding = "same")
+
+    x <- layer_add(x, residual)  # Add back residual
+    previous_block_activation <- x  # Set aside next residual
+  }
+
+  ### [Second half of the network: upsampling inputs] ###
+
+  for (filters in c(256, 128, 64, 32)) {
+    x <- x |>
+      layer_activation("relu") |>
+      layer_conv_2d_transpose(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_activation("relu") |>
+      layer_conv_2d_transpose(filters = filters, kernel_size = 3, padding = "same") |>
+      layer_batch_normalization() |>
+
+      layer_upsampling_2d(size = 2)
+
+    # Project residual
+    residual <- previous_block_activation |>
+      layer_upsampling_2d(size = 2) |>
+      layer_conv_2d(filters = filters, kernel_size = 1, padding = "same")
+
+    x <- layer_add(x, residual)     # Add back residual
+    previous_block_activation <- x  # Set aside next residual
+  }
+
+  # Add a per-pixel classification layer
+  outputs <- x |>
+    layer_conv_2d(num_classes, 3, activation = "softmax", padding = "same")
+
+  # Define the model
+  keras_model(inputs, outputs)
+}
+
+# Build model
+model <- get_model(img_size, num_classes)
+summary(model)
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━┓
+## ┃ Layer (type)       Output Shape       Param #  Connected to    Trai… 
+## ┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━┩
+## │ input_layer       │ (None, 160,     │         0 │ -              │   -
+## │ (InputLayer)      │ 160, 3)         │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d (Conv2D)   │ (None, 80, 80,  │       896 │ input_layer[0… │   Y
+## │                   │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       128 │ conv2d[0][0]   │   Y
+## │ (BatchNormalizat…32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation        │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_1      │ (None, 80, 80,  │         0 │ activation[0]… │   -
+## │ (Activation)      │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d  │ (None, 80, 80,  │     2,400 │ activation_1[Y
+## │ (SeparableConv2D) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       256 │ separable_con… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_2      │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 80, 80,  │     4,736 │ activation_2[Y
+## │ (SeparableConv2D) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       256 │ separable_con… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ max_pooling2d     │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (MaxPooling2D)    │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_1 (Conv2D) │ (None, 40, 40,  │     2,112 │ activation[0]… │   Y
+## │                   │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add (Add)         │ (None, 40, 40,  │         0 │ max_pooling2d… │   -
+## │                   │ 64)             │           │ conv2d_1[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_3      │ (None, 40, 40,  │         0 │ add[0][0]      │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 40, 40,  │     8,896 │ activation_3[Y
+## │ (SeparableConv2D) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       512 │ separable_con… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_4      │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 40, 40,  │    17,664 │ activation_4[Y
+## │ (SeparableConv2D) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       512 │ separable_con… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ max_pooling2d_1   │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (MaxPooling2D)    │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_2 (Conv2D) │ (None, 20, 20,  │     8,320 │ add[0][0]      │   Y
+## │                   │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_1 (Add)       │ (None, 20, 20,  │         0 │ max_pooling2d… │   -
+## │                   │ 128)            │           │ conv2d_2[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_5      │ (None, 20, 20,  │         0 │ add_1[0][0]    │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 20, 20,  │    34,176 │ activation_5[Y
+## │ (SeparableConv2D) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │     1,024 │ separable_con… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_6      │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ separable_conv2d… │ (None, 20, 20,  │    68,096 │ activation_6[Y
+## │ (SeparableConv2D) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │     1,024 │ separable_con… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ max_pooling2d_2   │ (None, 10, 10,  │         0 │ batch_normali… │   -
+## │ (MaxPooling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_3 (Conv2D) │ (None, 10, 10,  │    33,024 │ add_1[0][0]    │   Y
+## │                   │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_2 (Add)       │ (None, 10, 10,  │         0 │ max_pooling2d… │   -
+## │                   │ 256)            │           │ conv2d_3[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_7      │ (None, 10, 10,  │         0 │ add_2[0][0]    │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose  │ (None, 10, 10,  │   590,080 │ activation_7[Y
+## │ (Conv2DTranspose) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 10, 10,  │     1,024 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_8      │ (None, 10, 10,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 10, 10,  │   590,080 │ activation_8[Y
+## │ (Conv2DTranspose) │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 10, 10,  │     1,024 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_1   │ (None, 20, 20,  │         0 │ add_2[0][0]    │   -
+## │ (UpSampling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d     │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_4 (Conv2D) │ (None, 20, 20,  │    65,792 │ up_sampling2d… │   Y
+## │                   │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_3 (Add)       │ (None, 20, 20,  │         0 │ up_sampling2d… │   -
+## │                   │ 256)            │           │ conv2d_4[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_9      │ (None, 20, 20,  │         0 │ add_3[0][0]    │   -
+## │ (Activation)      │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 20, 20,  │   295,040 │ activation_9[Y
+## │ (Conv2DTranspose) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │       512 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_10     │ (None, 20, 20,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 20, 20,  │   147,584 │ activation_10… │   Y
+## │ (Conv2DTranspose) │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 20, 20,  │       512 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_3   │ (None, 40, 40,  │         0 │ add_3[0][0]    │   -
+## │ (UpSampling2D)    │ 256)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_2   │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_5 (Conv2D) │ (None, 40, 40,  │    32,896 │ up_sampling2d… │   Y
+## │                   │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_4 (Add)       │ (None, 40, 40,  │         0 │ up_sampling2d… │   -
+## │                   │ 128)            │           │ conv2d_5[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_11     │ (None, 40, 40,  │         0 │ add_4[0][0]    │   -
+## │ (Activation)      │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 40, 40,  │    73,792 │ activation_11… │   Y
+## │ (Conv2DTranspose) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       256 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_12     │ (None, 40, 40,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 40, 40,  │    36,928 │ activation_12… │   Y
+## │ (Conv2DTranspose) │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 40, 40,  │       256 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_5   │ (None, 80, 80,  │         0 │ add_4[0][0]    │   -
+## │ (UpSampling2D)    │ 128)            │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_4   │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_6 (Conv2D) │ (None, 80, 80,  │     8,256 │ up_sampling2d… │   Y
+## │                   │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_5 (Add)       │ (None, 80, 80,  │         0 │ up_sampling2d… │   -
+## │                   │ 64)             │           │ conv2d_6[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_13     │ (None, 80, 80,  │         0 │ add_5[0][0]    │   -
+## │ (Activation)      │ 64)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 80, 80,  │    18,464 │ activation_13… │   Y
+## │ (Conv2DTranspose) │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       128 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ activation_14     │ (None, 80, 80,  │         0 │ batch_normali… │   -
+## │ (Activation)      │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_transpose… │ (None, 80, 80,  │     9,248 │ activation_14… │   Y
+## │ (Conv2DTranspose) │ 32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ batch_normalizat… │ (None, 80, 80,  │       128 │ conv2d_transp… │   Y
+## │ (BatchNormalizat…32)             │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_7   │ (None, 160,     │         0 │ add_5[0][0]    │   -
+## │ (UpSampling2D)    │ 160, 64)        │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ up_sampling2d_6   │ (None, 160,     │         0 │ batch_normali… │   -
+## │ (UpSampling2D)    │ 160, 32)        │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_7 (Conv2D) │ (None, 160,     │     2,080 │ up_sampling2d… │   Y
+## │                   │ 160, 32)        │           │                │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ add_6 (Add)       │ (None, 160,     │         0 │ up_sampling2d… │   -
+## │                   │ 160, 32)        │           │ conv2d_7[0][0] │       │
+## ├───────────────────┼─────────────────┼───────────┼────────────────┼───────┤
+## │ conv2d_8 (Conv2D) │ (None, 160,     │       867 │ add_6[0][0]    │   Y
+## │                   │ 160, 3)         │           │                │       │
+## └───────────────────┴─────────────────┴───────────┴────────────────┴───────┘
+##  Total params: 2,058,979 (7.85 MB)
+##  Trainable params: 2,055,203 (7.84 MB)
+##  Non-trainable params: 3,776 (14.75 KB)
+
+
+

Set aside a validation split +

+
+# Split our img paths into a training and a validation set
+val_samples <- 1000
+val_samples <- sample.int(length(input_img_paths), val_samples)
+
+train_input_img_paths <- input_img_paths[-val_samples]
+train_target_img_paths <- target_img_paths[-val_samples]
+
+val_input_img_paths <- input_img_paths[val_samples]
+val_target_img_paths <- target_img_paths[val_samples]
+
+# Instantiate dataset for each split
+# Limit input files in `max_dataset_len` for faster epoch training time.
+# Remove the `max_dataset_len` arg when running with full dataset.
+train_dataset <- get_dataset(
+  batch_size,
+  img_size,
+  train_input_img_paths,
+  train_target_img_paths,
+  max_dataset_len = 1000
+)
+valid_dataset <- get_dataset(
+  batch_size, img_size, val_input_img_paths, val_target_img_paths
+)
+
+
+

Train the model +

+
+# Configure the model for training.
+# We use the "sparse" version of categorical_crossentropy
+# because our target data is integers.
+model |> compile(
+  optimizer = optimizer_adam(1e-4),
+  loss = "sparse_categorical_crossentropy"
+)
+
+callbacks <- list(
+  callback_model_checkpoint(
+    "models/oxford_segmentation.keras", save_best_only = TRUE
+  )
+)
+
+# Train the model, doing validation at the end of each epoch.
+epochs <- 50
+model |> fit(
+    train_dataset,
+    epochs=epochs,
+    validation_data=valid_dataset,
+    callbacks=callbacks,
+    verbose=2
+)
+
## Epoch 1/50
+## 32/32 - 27s - 844ms/step - loss: 1.4284 - val_loss: 1.5502
+## Epoch 2/50
+## 32/32 - 2s - 60ms/step - loss: 0.9221 - val_loss: 1.9881
+## Epoch 3/50
+## 32/32 - 2s - 60ms/step - loss: 0.7764 - val_loss: 2.5123
+## Epoch 4/50
+## 32/32 - 2s - 60ms/step - loss: 0.7200 - val_loss: 3.0148
+## Epoch 5/50
+## 32/32 - 2s - 60ms/step - loss: 0.6848 - val_loss: 3.2898
+## Epoch 6/50
+## 32/32 - 2s - 60ms/step - loss: 0.6556 - val_loss: 3.4525
+## Epoch 7/50
+## 32/32 - 2s - 60ms/step - loss: 0.6302 - val_loss: 3.5625
+## Epoch 8/50
+## 32/32 - 2s - 60ms/step - loss: 0.6082 - val_loss: 3.6537
+## Epoch 9/50
+## 32/32 - 2s - 60ms/step - loss: 0.5894 - val_loss: 3.7334
+## Epoch 10/50
+## 32/32 - 2s - 60ms/step - loss: 0.5726 - val_loss: 3.7954
+## Epoch 11/50
+## 32/32 - 2s - 60ms/step - loss: 0.5566 - val_loss: 3.8266
+## Epoch 12/50
+## 32/32 - 2s - 60ms/step - loss: 0.5407 - val_loss: 3.8028
+## Epoch 13/50
+## 32/32 - 2s - 60ms/step - loss: 0.5241 - val_loss: 3.7225
+## Epoch 14/50
+## 32/32 - 2s - 60ms/step - loss: 0.5063 - val_loss: 3.5891
+## Epoch 15/50
+## 32/32 - 2s - 60ms/step - loss: 0.4862 - val_loss: 3.4399
+## Epoch 16/50
+## 32/32 - 2s - 60ms/step - loss: 0.4640 - val_loss: 3.2488
+## Epoch 17/50
+## 32/32 - 2s - 60ms/step - loss: 0.4398 - val_loss: 2.9895
+## Epoch 18/50
+## 32/32 - 2s - 60ms/step - loss: 0.4141 - val_loss: 2.7040
+## Epoch 19/50
+## 32/32 - 2s - 60ms/step - loss: 0.3877 - val_loss: 2.3552
+## Epoch 20/50
+## 32/32 - 2s - 60ms/step - loss: 0.3622 - val_loss: 1.9751
+## Epoch 21/50
+## 32/32 - 2s - 60ms/step - loss: 0.3391 - val_loss: 1.6415
+## Epoch 22/50
+## 32/32 - 2s - 65ms/step - loss: 0.3201 - val_loss: 1.3455
+## Epoch 23/50
+## 32/32 - 2s - 65ms/step - loss: 0.3076 - val_loss: 1.1034
+## Epoch 24/50
+## 32/32 - 2s - 65ms/step - loss: 0.3076 - val_loss: 1.0204
+## Epoch 25/50
+## 32/32 - 2s - 65ms/step - loss: 0.3429 - val_loss: 0.9294
+## Epoch 26/50
+## 32/32 - 2s - 60ms/step - loss: 0.3633 - val_loss: 1.0385
+## Epoch 27/50
+## 32/32 - 2s - 65ms/step - loss: 0.3294 - val_loss: 0.8552
+## Epoch 28/50
+## 32/32 - 2s - 60ms/step - loss: 0.2888 - val_loss: 0.9904
+## Epoch 29/50
+## 32/32 - 2s - 60ms/step - loss: 0.2722 - val_loss: 1.1511
+## Epoch 30/50
+## 32/32 - 2s - 60ms/step - loss: 0.2675 - val_loss: 1.1777
+## Epoch 31/50
+## 32/32 - 2s - 60ms/step - loss: 0.2710 - val_loss: 1.1230
+## Epoch 32/50
+## 32/32 - 2s - 60ms/step - loss: 0.2818 - val_loss: 1.1107
+## Epoch 33/50
+## 32/32 - 2s - 60ms/step - loss: 0.3111 - val_loss: 1.2046
+## Epoch 34/50
+## 32/32 - 2s - 60ms/step - loss: 0.3025 - val_loss: 1.2817
+## Epoch 35/50
+## 32/32 - 2s - 60ms/step - loss: 0.2902 - val_loss: 1.0870
+## Epoch 36/50
+## 32/32 - 2s - 60ms/step - loss: 0.2812 - val_loss: 0.9639
+## Epoch 37/50
+## 32/32 - 2s - 60ms/step - loss: 0.2769 - val_loss: 1.3830
+## Epoch 38/50
+## 32/32 - 2s - 60ms/step - loss: 0.2629 - val_loss: 1.0656
+## Epoch 39/50
+## 32/32 - 2s - 60ms/step - loss: 0.2439 - val_loss: 1.2142
+## Epoch 40/50
+## 32/32 - 2s - 60ms/step - loss: 0.2417 - val_loss: 1.1791
+## Epoch 41/50
+## 32/32 - 2s - 60ms/step - loss: 0.2437 - val_loss: 1.3024
+## Epoch 42/50
+## 32/32 - 2s - 60ms/step - loss: 0.2340 - val_loss: 1.5025
+## Epoch 43/50
+## 32/32 - 2s - 60ms/step - loss: 0.2330 - val_loss: 1.2029
+## Epoch 44/50
+## 32/32 - 2s - 60ms/step - loss: 0.2228 - val_loss: 1.1434
+## Epoch 45/50
+## 32/32 - 2s - 60ms/step - loss: 0.2169 - val_loss: 1.1099
+## Epoch 46/50
+## 32/32 - 2s - 60ms/step - loss: 0.2116 - val_loss: 1.1009
+## Epoch 47/50
+## 32/32 - 2s - 60ms/step - loss: 0.2059 - val_loss: 1.0853
+## Epoch 48/50
+## 32/32 - 2s - 60ms/step - loss: 0.2004 - val_loss: 1.1272
+## Epoch 49/50
+## 32/32 - 2s - 60ms/step - loss: 0.1924 - val_loss: 1.0651
+## Epoch 50/50
+## 32/32 - 2s - 60ms/step - loss: 0.1844 - val_loss: 1.1039
+
+
+

Visualize predictions +

+
+model <- load_model("models/oxford_segmentation.keras")
+# Generate predictions for all images in the validation set
+val_dataset <- get_dataset(
+  batch_size, img_size, val_input_img_paths, val_target_img_paths
+)
+val_preds <- predict(model, val_dataset)
+
## 32/32 - 3s - 83ms/step
+
+display_mask <- function(i) {
+  # Quick utility to display a model's prediction.
+  mask <- val_preds[i,,,] %>%
+    apply(c(1,2), which.max) %>%
+    array_reshape(dim = c(img_size, 1))
+  mask <- abind::abind(mask, mask, mask, along = 3)
+  plot(as.raster(mask, max = 3))
+}
+
+# Display results for validation image #10
+i <- 10
+
+par(mfrow = c(1, 3))
+# Display input image
+input_img_paths[i] |>
+  jpeg::readJPEG() |>
+  as.raster() |>
+  plot()
+
+# Display ground-truth target mask
+target_img_paths[i] |>
+  png::readPNG() |>
+  magrittr::multiply_by(255)|>
+  as.raster(max = 3) |>
+  plot()
+
+# Display mask predicted by our model
+display_mask(i)  # Note that the model only sees inputs at 150x150.
+
+plot of chunk unnamed-chunk-9
plot of chunk unnamed-chunk-9
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-4-1.png b/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-4-1.png new file mode 100644 index 0000000000..75e535ac6b Binary files /dev/null and b/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-4-1.png differ diff --git a/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-4-2.png b/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-4-2.png new file mode 100644 index 0000000000..816d0d588d Binary files /dev/null and b/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-4-2.png differ diff --git a/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-9-1.png b/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-9-1.png new file mode 100644 index 0000000000..e108292793 Binary files /dev/null and b/docs/articles/examples/vision/oxford_pets_image_segmentation/unnamed-chunk-9-1.png differ diff --git a/docs/articles/faq.html b/docs/articles/faq.html deleted file mode 100644 index 272dffe039..0000000000 --- a/docs/articles/faq.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/articles/functional_api.html b/docs/articles/functional_api.html index ea632c9293..58f9f26e71 100644 --- a/docs/articles/functional_api.html +++ b/docs/articles/functional_api.html @@ -1,10 +1,1143 @@ - - + + + - - + + + +The Functional API • keras3 + + + + + + + + + + + + + + - + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+ +
+
+

Introduction +

+

The Keras functional API is a way to create models that are +more flexible than the sequential API. The functional API can handle +models with non-linear topology, shared layers, and even multiple inputs +or outputs.

+

The main idea is that a deep learning model is usually a directed +acyclic graph (DAG) of layers. So the functional API is a way to build +graphs of layers.

+

Consider the following model:

+
+
(input: 784-dimensional vectors)
+       ↧
+[Dense (64 units, relu activation)]
+       ↧
+[Dense (64 units, relu activation)]
+       ↧
+[Dense (10 units, softmax activation)]
+       ↧
+(output: logits of a probability distribution over 10 classes)
+
+

This is a basic graph with three layers. To build this model using +the functional API, start by creating an input node:

+
+inputs <- keras_input(shape = c(784))
+

The shape of the data is set as a 784-dimensional vector. The batch +size is always omitted since only the shape of each sample is +specified.

+

If, for example, you have an image input with a shape of +(32, 32, 3), you would use:

+
+# Just for demonstration purposes.
+img_inputs <- keras_input(shape = c(32, 32, 3))
+

The inputs that is returned contains information about +the shape and dtype of the input data that you feed to your +model. Here’s the shape:

+
+shape(inputs)
+
## shape(NA, 784)
+

Here’s the dtype:

+
+inputs$dtype
+
## [1] "float32"
+

You create a new node in the graph of layers by calling a layer on +this inputs object:

+
+dense <- layer_dense(units = 64, activation="relu")
+x <- dense(inputs)
+

The “layer call” action is like drawing an arrow from “inputs” to +this layer you created. You’re “passing” the inputs to the +dense layer, and you get x as the output.

+

Let’s add a few more layers to the graph of layers:

+
+outputs <- x |>
+  layer_dense(units = 64, activation = "relu") |>
+  layer_dense(units = 10)
+

At this point, you can create a Model by specifying its +inputs and outputs in the graph of layers:

+
+model <- keras_model(inputs = inputs, outputs = outputs, name = "mnist_model")
+

Let’s check out what the model summary looks like:

+
+summary(model)
+
## Model: "mnist_model"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ input_layer (InputLayer)        │ (None, 784)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense (Dense)                   │ (None, 64)             │        50,240
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_1 (Dense)                 │ (None, 64)             │         4,160
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_2 (Dense)                 │ (None, 10)             │           650
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 55,050 (215.04 KB)
+##  Trainable params: 55,050 (215.04 KB)
+##  Non-trainable params: 0 (0.00 B)
+

You can also plot the model as a graph:

+
+plot(model)
+

+

And, optionally, display the input and output shapes of each layer in +the plotted graph:

+
+plot(model, show_shapes = TRUE)
+

+

This figure and the code are almost identical. In the code version, +the connection arrows are replaced by the call operation.

+

A “graph of layers” is an intuitive mental image for a deep learning +model, and the functional API is a way to create models that closely +mirrors this.

+
+
+

Training, evaluation, and inference +

+

Training, evaluation, and inference work exactly in the same way for +models built using the functional API as for Sequential +models.

+

The Model class offers a built-in training loop (the +fit() method) and a built-in evaluation loop (the +evaluate() method). Note that you can easily customize these loops to +implement training routines beyond supervised learning (e.g. GANs).

+ + + + +

Here, load the MNIST image data, reshape it into vectors, fit the +model on the data (while monitoring performance on a validation split), +then evaluate the model on the test data:

+
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+
+x_train <- array_reshape(x_train, c(60000, 784)) / 255
+x_test <- array_reshape(x_test, c(10000, 784)) / 255
+
+model |> compile(
+  loss = loss_sparse_categorical_crossentropy(from_logits = TRUE),
+  optimizer = optimizer_rmsprop(),
+  metrics = "accuracy"
+)
+
+history <- model |> fit(
+    x_train, y_train, batch_size = 64, epochs = 2, validation_split = 0.2
+)
+
## Epoch 1/2
+## 750/750 - 2s - 3ms/step - accuracy: 0.8979 - loss: 0.3540 - val_accuracy: 0.9448 - val_loss: 0.1903
+## Epoch 2/2
+## 750/750 - 1s - 920us/step - accuracy: 0.9509 - loss: 0.1635 - val_accuracy: 0.9597 - val_loss: 0.1397
+
+test_scores <- model |> evaluate(x_test, y_test, verbose=2)
+
## 313/313 - 1s - 2ms/step - accuracy: 0.9595 - loss: 0.1328
+
+cat("Test loss:", test_scores$loss, "\n")
+cat("Test accuracy:", test_scores$accuracy, "\n")
+
## Test loss: 0.132778
+## Test accuracy: 0.9595
+

For further reading, see the training and evaluation +guide.

+
+
+

Save and serialize +

+

Saving the model and serialization work the same way for models built +using the functional API as they do for Sequential models. +The standard way to save a functional model is to call +model.save() to save the entire model as a single file. You +can later recreate the same model from this file, even if the code that +built the model is no longer available.

+

This saved file includes the: - model architecture - model weight +values (that were learned during training) - model training config, if +any (as passed to compile()) - optimizer and its state, if +any (to restart training where you left off)

+
+model |> save_model("my_model.keras")
+rm(model)
+# Recreate the exact same model purely from the file:
+model <- load_model("my_model.keras")
+

For details, read the model serialization & saving +guide.

+
+
+

Use the same graph of layers to define multiple models +

+

In the functional API, models are created by specifying their inputs +and outputs in a graph of layers. That means that a single graph of +layers can be used to generate multiple models.

+

In the example below, you use the same stack of layers to instantiate +two models: an encoder model that turns image inputs into +16-dimensional vectors, and an end-to-end autoencoder model +for training.

+
+encoder_input <- keras_input(shape = c(28, 28, 1), name="img")
+encoder_output <- encoder_input |>
+  layer_conv_2d(16, 3, activation = "relu") |>
+  layer_conv_2d(32, 3, activation = "relu") |>
+  layer_max_pooling_2d(3) |>
+  layer_conv_2d(32, 3, activation = "relu") |>
+  layer_conv_2d(16, 3, activation = "relu") |>
+  layer_global_max_pooling_2d()
+
+encoder <- keras_model(encoder_input, encoder_output, name="encoder")
+summary(encoder)
+
## Model: "encoder"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ img (InputLayer)                │ (None, 28, 28, 1)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d (Conv2D)                 │ (None, 26, 26, 16)     │           160
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 24, 24, 32)     │         4,640
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 8, 8, 32)       │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 6, 6, 32)       │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_3 (Conv2D)               │ (None, 4, 4, 16)       │         4,624
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_max_pooling2d            │ (None, 16)             │             0
+## │ (GlobalMaxPooling2D)            │                        │               │
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 18,672 (72.94 KB)
+##  Trainable params: 18,672 (72.94 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+decoder_output <- encoder_output |>
+  layer_reshape(c(4, 4, 1)) |>
+  layer_conv_2d_transpose(16, 3, activation = "relu") |>
+  layer_conv_2d_transpose(32, 3, activation = "relu") |>
+  layer_upsampling_2d(3) |>
+  layer_conv_2d_transpose(16, 3, activation = "relu") |>
+  layer_conv_2d_transpose(1, 3, activation = "relu")
+
+autoencoder <- keras_model(encoder_input, decoder_output, name="autoencoder")
+summary(autoencoder)
+
## Model: "autoencoder"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ img (InputLayer)                │ (None, 28, 28, 1)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d (Conv2D)                 │ (None, 26, 26, 16)     │           160
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 24, 24, 32)     │         4,640
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 8, 8, 32)       │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 6, 6, 32)       │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_3 (Conv2D)               │ (None, 4, 4, 16)       │         4,624
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_max_pooling2d            │ (None, 16)             │             0
+## │ (GlobalMaxPooling2D)            │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ reshape (Reshape)               │ (None, 4, 4, 1)        │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose                │ (None, 6, 6, 16)       │           160
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_1              │ (None, 8, 8, 32)       │         4,640
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ up_sampling2d (UpSampling2D)    │ (None, 24, 24, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_2              │ (None, 26, 26, 16)     │         4,624
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_3              │ (None, 28, 28, 1)      │           145
+## │ (Conv2DTranspose)               │                        │               │
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 28,241 (110.32 KB)
+##  Trainable params: 28,241 (110.32 KB)
+##  Non-trainable params: 0 (0.00 B)
+

Here, the decoding architecture is strictly symmetrical to the +encoding architecture, so the output shape is the same as the input +shape (28, 28, 1).

+

The reverse of a conv_2d layer is a +conv_2d_transpose layer, and the reverse of a +max_pooling_2d layer is an upsampling_2d +layer.

+
+
+

All models are callable, just like layers +

+

You can treat any model as if it were a layer by invoking it on an +Input or on the output of another layer. By calling a model +you aren’t just reusing the architecture of the model, you’re also +reusing its weights.

+

To see this in action, here’s a different take on the autoencoder +example that creates an encoder model, a decoder model, and chains them +in two calls to obtain the autoencoder model:

+
+encoder_input <- keras_input(shape = c(28, 28, 1), name="img")
+encoder_output <- encoder_input |>
+  layer_conv_2d(16, 3, activation = "relu") |>
+  layer_conv_2d(32, 3, activation = "relu") |>
+  layer_max_pooling_2d(3) |>
+  layer_conv_2d(32, 3, activation = "relu") |>
+  layer_conv_2d(16, 3, activation = "relu") |>
+  layer_global_max_pooling_2d()
+
+encoder <- keras_model(encoder_input, encoder_output, name="encoder")
+summary(encoder)
+
## Model: "encoder"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ img (InputLayer)                │ (None, 28, 28, 1)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_4 (Conv2D)               │ (None, 26, 26, 16)     │           160
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_5 (Conv2D)               │ (None, 24, 24, 32)     │         4,640
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d_1 (MaxPooling2D)  │ (None, 8, 8, 32)       │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_6 (Conv2D)               │ (None, 6, 6, 32)       │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_7 (Conv2D)               │ (None, 4, 4, 16)       │         4,624
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_max_pooling2d_1          │ (None, 16)             │             0
+## │ (GlobalMaxPooling2D)            │                        │               │
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 18,672 (72.94 KB)
+##  Trainable params: 18,672 (72.94 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+decoder_input <- keras_input(shape = c(16), name = "encoded_img")
+decoder_output <- decoder_input |>
+  layer_reshape(c(4, 4, 1)) |>
+  layer_conv_2d_transpose(16, 3, activation = "relu") |>
+  layer_conv_2d_transpose(32, 3, activation = "relu") |>
+  layer_upsampling_2d(3) |>
+  layer_conv_2d_transpose(16, 3, activation = "relu") |>
+  layer_conv_2d_transpose(1, 3, activation = "relu")
+
+decoder <- keras_model(decoder_input, decoder_output, name = "decoder")
+summary(decoder)
+
## Model: "decoder"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ encoded_img (InputLayer)        │ (None, 16)             │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ reshape_1 (Reshape)             │ (None, 4, 4, 1)        │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_4              │ (None, 6, 6, 16)       │           160
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_5              │ (None, 8, 8, 32)       │         4,640
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ up_sampling2d_1 (UpSampling2D)  │ (None, 24, 24, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_6              │ (None, 26, 26, 16)     │         4,624
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_7              │ (None, 28, 28, 1)      │           145
+## │ (Conv2DTranspose)               │                        │               │
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 9,569 (37.38 KB)
+##  Trainable params: 9,569 (37.38 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+autoencoder_input <- keras_input(shape = c(28, 28, 1), name = "img")
+encoded_img <- encoder(autoencoder_input)
+decoded_img <- decoder(encoded_img)
+autoencoder <- keras_model(autoencoder_input, decoded_img,
+                           name = "autoencoder")
+summary(autoencoder)
+
## Model: "autoencoder"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ img (InputLayer)                │ (None, 28, 28, 1)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ encoder (Functional)            │ (None, 16)             │        18,672
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ decoder (Functional)            │ (None, 28, 28, 1)      │         9,569
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 28,241 (110.32 KB)
+##  Trainable params: 28,241 (110.32 KB)
+##  Non-trainable params: 0 (0.00 B)
+

As you can see, the model can be nested: a model can contain +sub-models (since a model is just like a layer). A common use case for +model nesting is ensembling. For example, here’s how to +ensemble a set of models into a single model that averages their +predictions:

+
+get_model <- function() {
+  inputs <- keras_input(shape = 128)
+  outputs <- inputs |> layer_dense(1)
+  keras_model(inputs, outputs)
+}
+
+model1 <- get_model()
+model2 <- get_model()
+model3 <- get_model()
+
+inputs <- keras_input(shape = 128)
+y1 <- model1(inputs)
+y2 <- model2(inputs)
+y3 <- model3(inputs)
+outputs <- layer_average(list(y1, y2, y3))
+ensemble_model <- keras_model(inputs = inputs, outputs = outputs)
+
+
+

Manipulate complex graph topologies +

+
+

Models with multiple inputs and outputs +

+

The functional API makes it easy to manipulate multiple inputs and +outputs. This cannot be handled with the Sequential +API.

+

For example, if you’re building a system for ranking customer issue +tickets by priority and routing them to the correct department, then the +model will have three inputs:

+
    +
  • the title of the ticket (text input),
  • +
  • the text body of the ticket (text input), and
  • +
  • any tags added by the user (categorical input)
  • +
+

This model will have two outputs:

+
    +
  • the priority score between 0 and 1 (scalar sigmoid output), and
  • +
  • the department that should handle the ticket (softmax output over +the set of departments).
  • +
+

You can build this model in a few lines with the functional API:

+
+num_tags <- 12  # Number of unique issue tags
+num_words <- 10000  # Size of vocabulary obtained when preprocessing text data
+num_departments <- 4  # Number of departments for predictions
+
+title_input <- # Variable-length sequence of ints
+  keras_input(shape(NA), name = "title")
+body_input <-  # Variable-length sequence of ints
+  keras_input(shape(NA), name = "body")
+tags_input <-  # Binary vectors of size `num_tags`
+  keras_input(shape = num_tags, name = "tags")
+
+# Embed each word in the title into a 64-dimensional vector
+title_features <- layer_embedding(title_input, num_words, 64)
+# Embed each word in the text into a 64-dimensional vector
+body_features <- layer_embedding(body_input, num_words, 64)
+
+# Reduce sequence of embedded words in the title
+# into a single 128-dimensional vector
+title_features <- layer_lstm(title_features, 128)
+# Reduce sequence of embedded words in the body
+# into a single 32-dimensional vector
+body_features <- layer_lstm(body_features, 32)
+
+# Merge all available features into a single large vector via concatenation
+x <- layer_concatenate(title_features, body_features, tags_input)
+
+# Stick a logistic regression for priority prediction on top of the features
+priority_pred <- layer_dense(x, 1, name = "priority")
+
+# Stick a department classifier on top of the features
+department_pred <- layer_dense(x, num_departments, name = "department")
+
+# Instantiate an end-to-end model predicting both priority and department
+model <- keras_model(
+  inputs = list(title_input, body_input, tags_input),
+  outputs = list(priority = priority_pred, department = department_pred)
+)
+

Now plot the model:

+
+plot(model, show_shapes = TRUE)
+

+

When compiling this model, you can assign different losses to each +output. You can even assign different weights to each loss – to modulate +their contribution to the total training loss.

+
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(
+    loss_binary_crossentropy(from_logits = TRUE),
+    loss_categorical_crossentropy(from_logits = TRUE)
+  ),
+  loss_weights = c(1.0, 0.2)
+)
+

Since the output layers have different names, you could also specify +the losses and loss weights with the corresponding layer names:

+
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(
+    priority = loss_binary_crossentropy(from_logits = TRUE),
+    department = loss_categorical_crossentropy(from_logits = TRUE)
+  ),
+  loss_weights = list(priority = 1.0, department = 0.2)
+)
+

Train the model by passing lists of NumPy arrays of inputs and +targets:

+
+# Dummy input data
+title_data <- random_integer(c(1280, 10), 0, num_words)
+body_data <- random_integer(c(1280, 100), 0, num_words)
+tags_data <- random_integer(c(1280, num_tags), 0, 2)
+
+# Dummy target data
+priority_targets <- random_normal(c(1280, 1))
+dept_targets <- random_integer(c(1280, num_departments), 0, 2)
+
+model |> fit(
+  list(title = title_data, body = body_data, tags = tags_data),
+  list(priority = priority_targets, department = dept_targets),
+  epochs = 2,
+  batch_size = 32
+)
+
## Epoch 1/2
+## 40/40 - 3s - 69ms/step - loss: 0.3948
+## Epoch 2/2
+## 40/40 - 0s - 6ms/step - loss: 0.1971
+

When calling fit with a Dataset object, it should yield +either a list of lists like +list(list(title_data, body_data, tags_data), list(priority_targets, dept_targets)) +or a list of named lists like +list(list(title = title_data, body = body_data, tags = tags_data), list(priority = priority_targets, department = dept_targets)).

+

For more detailed explanation, refer to the training and evaluation +guide.

+
+
+

A toy ResNet model +

+

In addition to models with multiple inputs and outputs, the +functional API makes it easy to manipulate non-linear connectivity +topologies – these are models with layers that are not connected +sequentially, which the Sequential API cannot handle.

+

A common use case for this is residual connections. Let’s build a toy +ResNet model for CIFAR10 to demonstrate this:

+
+inputs <- keras_input(shape = c(32, 32, 3), name = "img")
+block_1_output <- inputs |>
+  layer_conv_2d(32, kernel_size = 3, activation = "relu") |>
+  layer_conv_2d(64, kernel_size = 3, activation = "relu") |>
+  layer_max_pooling_2d(pool_size = 3)
+
+block_2_output <- block_1_output |>
+  layer_conv_2d(32, kernel_size = 3, activation = "relu", padding = "same") |>
+  layer_conv_2d(64, kernel_size = 3, activation = "relu", padding = "same") |>
+  layer_add(block_1_output)
+
+block_3_output <- block_2_output |>
+  layer_conv_2d(64, kernel_size = 3, activation = "relu", padding = "same") |>
+  layer_conv_2d(64, kernel_size = 3, activation = "relu", padding = "same") |>
+  layer_add(block_2_output)
+
+outputs <- block_3_output |>
+  layer_conv_2d(64, 3, activation = "relu") |>
+  layer_global_average_pooling_2d() |>
+  layer_dense(256, activation = "relu") |>
+  layer_dropout(0.5) |>
+  layer_dense(10)
+
+model <- keras_model(inputs, outputs, name = "toy_resnet")
+summary(model)
+
## Model: "toy_resnet"
+## ┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)         Output Shape          Param #  Connected to      
+## ┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
+## │ img (InputLayer)    │ (None, 32, 32, 3) │          0 │ -                 │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ conv2d_8 (Conv2D)   │ (None, 30, 30,    │        896 │ img[0][0]         │
+## │                     │ 32)               │            │                   │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ conv2d_9 (Conv2D)   │ (None, 28, 28,    │     18,496 │ conv2d_8[0][0]    │
+## │                     │ 64)               │            │                   │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ max_pooling2d_2     │ (None, 9, 9, 64)  │          0 │ conv2d_9[0][0]    │
+## │ (MaxPooling2D)      │                   │            │                   │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ conv2d_10 (Conv2D)  │ (None, 9, 9, 32)  │     18,464 │ max_pooling2d_2[
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ conv2d_11 (Conv2D)  │ (None, 9, 9, 64)  │     18,496 │ conv2d_10[0][0]   │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ add (Add)           │ (None, 9, 9, 64)  │          0 │ conv2d_11[0][0],  │
+## │                     │                   │            │ max_pooling2d_2[
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ conv2d_12 (Conv2D)  │ (None, 9, 9, 64)  │     36,928 │ add[0][0]         │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ conv2d_13 (Conv2D)  │ (None, 9, 9, 64)  │     36,928 │ conv2d_12[0][0]   │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ add_1 (Add)         │ (None, 9, 9, 64)  │          0 │ conv2d_13[0][0],  │
+## │                     │                   │            │ add[0][0]         │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ conv2d_14 (Conv2D)  │ (None, 7, 7, 64)  │     36,928 │ add_1[0][0]       │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ global_average_poo… │ (None, 64)        │          0 │ conv2d_14[0][0]   │
+## │ (GlobalAveragePool… │                   │            │                   │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ dense_6 (Dense)     │ (None, 256)       │     16,640 │ global_average_p… │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ dropout (Dropout)   │ (None, 256)       │          0 │ dense_6[0][0]     │
+## ├─────────────────────┼───────────────────┼────────────┼───────────────────┤
+## │ dense_7 (Dense)     │ (None, 10)        │      2,570 │ dropout[0][0]     │
+## └─────────────────────┴───────────────────┴────────────┴───────────────────┘
+##  Total params: 186,346 (727.91 KB)
+##  Trainable params: 186,346 (727.91 KB)
+##  Non-trainable params: 0 (0.00 B)
+

Plot the model:

+
+plot(model, show_shapes = TRUE)
+

+

Now train the model:

+
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_cifar10()
+
+x_train <- x_train / 255.0
+x_test <- x_test / 255.0
+
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = loss_sparse_categorical_crossentropy(from_logits = TRUE),
+  metrics = "acc"
+)
+# We restrict the data to the first 1000 samples so as to limit the
+# guide render time.
+# Try to train on the entire dataset until convergence!
+model |> fit(
+  x_train[1:1000, , , ],
+  y_train[1:1000, ],
+  batch_size = 64,
+  epochs = 1,
+  validation_split = 0.2
+)
+
## 13/13 - 6s - 478ms/step - acc: 0.1250 - loss: 2.2998 - val_acc: 0.1250 - val_loss: 2.2939
+
+
+
+

Shared layers +

+

Another good use for the functional API are models that use +shared layers. Shared layers are layer instances that are +reused multiple times in the same model – they learn features that +correspond to multiple paths in the graph-of-layers.

+

Shared layers are often used to encode inputs from similar spaces +(say, two different pieces of text that feature similar vocabulary). +They enable sharing of information across these different inputs, and +they make it possible to train such a model on less data. If a given +word is seen in one of the inputs, that will benefit the processing of +all inputs that pass through the shared layer.

+

To share a layer in the functional API, call the same layer instance +multiple times. For instance, here’s an Embedding layer +shared across two different text inputs:

+
+# Embedding for 1000 unique words mapped to 128-dimensional vectors
+shared_embedding <- layer_embedding(input_dim = 1000, output_dim = 128)
+
+# Variable-length sequence of integers
+text_input_a <- keras_input(shape = shape(NA), dtype="int32")
+
+# Variable-length sequence of integers
+text_input_b <- keras_input(shape = shape(NA), dtype="int32")
+
+# Reuse the same layer to encode both inputs
+encoded_input_a <- shared_embedding(text_input_a)
+encoded_input_b <- shared_embedding(text_input_b)
+
+
+

Extract and reuse nodes in the graph of layers +

+

Because the graph of layers you are manipulating is a static data +structure, it can be accessed and inspected. And this is how you are +able to plot functional models as images.

+

This also means that you can access the activations of intermediate +layers (“nodes” in the graph) and reuse them elsewhere – which is very +useful for something like feature extraction.

+

Let’s look at an example. This is a VGG19 model with weights +pretrained on ImageNet:

+
+vgg19 <- application_vgg19()
+

And these are the intermediate activations of the model, obtained by +querying the graph data structure:

+
+features_list <- lapply(vgg19$layers, function(x) x$output)
+

Use these features to create a new feature-extraction model that +returns the values of the intermediate layer activations:

+
+feat_extraction_model <- keras_model(inputs = vgg19$input,
+                                     outputs = features_list)
+
+img <- random_normal(c(1, 224, 224, 3))
+extracted_features <- feat_extraction_model(img)
+

This comes in handy for tasks like neural +style transfer, among other things.

+
+
+

Extend the API using custom layers +

+

keras includes a wide range of built-in layers, for +example:

+
    +
  • Convolutional layers: conv_1d, conv_2d, +conv_3d, conv_2d_transpose +
  • +
  • Pooling layers: max_pooling_1d, +max_pooling_2d, max_pooling_3d, +average_pooling_3d +
  • +
  • RNN layers: gru, lstm, +conv_lstm_2d +
  • +
  • +batch_normalization, dropout, +embedding, etc.
  • +
+

But if you don’t find what you need, it’s easy to extend the API by +creating your own layers. All layers subclass the Layer +class and implement:

+
    +
  • +call method, that specifies the computation done by the +layer.
  • +
  • +build method, that creates the weights of the layer +(this is just a style convention since you can create weights in +initialize, as well).
  • +
+

To learn more about creating layers from scratch, read custom layers +and models guide.

+

The following is a basic implementation of +layer_dense():

+
+custom_dense <- Layer(
+  classname = "CustomDense",
+  initialize = function(units = 32) {
+    super$initialize()
+    self$units <- as.integer(units)
+  },
+  build = function(input_shape) {
+    self$w <- self$add_weight(
+      shape = shape(input_shape[[2]], self$units),
+      initializer = "random_normal",
+      trainable = TRUE,
+    )
+    self$b <- self$add_weight(
+      shape = shape(self$units),
+      initializer="random_normal",
+      trainable = TRUE
+    )
+  },
+  call = function(inputs) {
+    op_matmul(inputs, self$w) + self$b
+  }
+)
+
+inputs <- keras_input(c(4))
+outputs <- custom_dense(inputs, 10)
+
+model <- keras_model(inputs, outputs)
+

For serialization support in your custom layer, define a +get_config() method that returns the constructor arguments +of the layer instance:

+
+custom_dense <- Layer(
+  classname = "CustomDense",
+
+  initialize = function(units = 32, ...) {
+    super$initialize()
+    self$units <- as.integer(units)
+  },
+
+  build = function(input_shape) {
+    self$w <- self$add_weight(
+      shape = shape(input_shape[[2]], self$units),
+      initializer = "random_normal",
+      trainable = TRUE,
+    )
+    self$b <- self$add_weight(
+      shape = shape(self$units),
+      initializer="random_normal",
+      trainable = TRUE
+    )
+  },
+
+  call = function(inputs) {
+    op_matmul(inputs, self$w) + self$b
+  },
+
+  get_config = function() {
+    list(units = self$units)
+  }
+)
+
+inputs <- keras_input(c(4))
+outputs <- custom_dense(inputs, 10)
+
+model <- keras_model(inputs, outputs)
+config <- get_config(model)
+
+new_model <-
+  from_config(config, custom_objects = list(CustomDense = custom_dense))
+

Optionally, implement the class method +from_config(cls, config) which is used when recreating a +layer instance given its config dictionary. The default implementation +of from_config is:

+
+from_config <- function(cls, config) {
+  do.call(cls, config)
+}
+
+
+

When to use the functional API +

+

Should you use the Keras functional API to create a new model, or +just subclass the Model class directly? In general, the +functional API is higher-level, easier and safer, and has a number of +features that subclassed models do not support.

+

However, model subclassing provides greater flexibility when building +models that are not easily expressible as directed acyclic graphs of +layers. For example, you could not implement a Tree-RNN with the +functional API and would have to subclass Model +directly.

+

For an in-depth look at the differences between the functional API +and model subclassing, read What +are Symbolic and Imperative APIs in TensorFlow 2.0?.

+
+

Functional API strengths: +

+

The following properties are also true for Sequential models (which +are also data structures), but are not true for subclassed models (which +are R and Python (byte)code, not data structures).

+
+

Less verbose +

+

There is no super$initialize(...), no +call = function(...), no self$..., etc.

+

Compare:

+
+inputs <- keras_input(shape = shape(32))
+outputs <- inputs |>
+  layer_dense(64, activation = "relu") |>
+  layer_dense(10)
+mlp <- keras_model(inputs, outputs)
+

With the subclassed version:

+
+MLP <- Model(
+  classname = "MLP",
+  initialize = function(...) {
+    super$initialize(...)
+    self$dense_1 <- layer_dense(units = 64, activation = "relu")
+    self$dense_2 <- layer_dense(units = 10)
+  },
+  call = function(inputs) {
+    inputs |>
+      self$dense_1() |>
+      self$dense_2()
+  }
+)
+
+# Instantiate the model.
+mlp <- MLP()
+# Necessary to create the model's state.
+# The model doesn't have a state until it's called at least once.
+out <- mlp(op_zeros(c(1, 32)))
+
+
+

Model validation while defining its connectivity graph +

+

In the functional API, the input specification (shape and dtype) is +created in advance (using Input). Every time you call a +layer, the layer checks that the specification passed to it matches its +assumptions, and it will raise a helpful error message if not.

+

This guarantees that any model you can build with the functional API +will run. All debugging – other than convergence-related debugging – +happens statically during the model construction and not at execution +time. This is similar to type checking in a compiler.

+
+
+

A functional model is plottable and inspectable +

+

You can plot the model as a graph, and you can easily access +intermediate nodes in this graph. For example, to extract and reuse the +activations of intermediate layers (as seen in a previous example):

+
+features_list <- lapply(vgg19$layers, function(x) x$output)
+feat_extraction_model <- keras_model(inputs = vgg19$input,
+                                     outputs = features_list)
+
+
+

A functional model can be serialized or cloned +

+

Because a functional model is a data structure rather than a piece of +code, it is safely serializable and can be saved as a single file that +allows you to recreate the exact same model without having access to any +of the original code. See the serialization & saving +guide.

+

To serialize a subclassed model, it is necessary for the implementer +to specify a get_config() and from_config() +method at the model level.

+
+
+
+

Functional API weakness: +

+
+

It does not support dynamic architectures +

+

The functional API treats models as DAGs of layers. This is true for +most deep learning architectures, but not all – for example, recursive +networks or Tree RNNs do not follow this assumption and cannot be +implemented in the functional API.

+
+
+
+
+

Mix-and-match API styles +

+

Choosing between the functional API or Model subclassing isn’t a +binary decision that restricts you into one category of models. All +models in the keras API can interact with each other, +whether they’re Sequential models, functional models, or +subclassed models that are written from scratch.

+

You can always use a functional model or Sequential +model as part of a subclassed model or layer:

+
+units <- 32
+timesteps <- 10
+input_dim <- 5
+
+# Define a Functional model
+inputs <- keras_input(shape(NA, units))
+outputs <- inputs |>
+  layer_global_average_pooling_1d() |>
+  layer_dense(units = 1)
+
+model <- keras_model(inputs, outputs)
+
+layer_custom_rnn <- Layer(
+  classname = "CustomRNN",
+  initialize = function(...) {
+    super$initialize(...)
+    self$units <- units
+    self$projection_1 <- layer_dense(units = units, activation = "tanh")
+    self$projection_2 <- layer_dense(units = units, activation = "tanh")
+    self$classifier <- model
+  },
+  call = function(inputs, ...) {
+    outputs <- list()
+    state <- op_zeros(c(shape(inputs)[[1]], self$units))
+    for (t in 1:(shape(inputs)[[2]])) {
+      x <- inputs[, t, ]
+      h <- self$projection_1(x)
+      y <- h + self$projection_2(state)
+      state <- y
+      outputs[[t]] <- y
+    }
+    features <- op_stack(outputs, axis = 2)
+    self$classifier(features)
+  }
+)
+
+rnn <- layer_custom_rnn()
+out <- rnn(op_zeros(c(1, timesteps, input_dim)))
+

You can use any subclassed layer or model in the functional API as +long as it implements a call method that follows one of the +following patterns:

+
    +
  • +call(inputs, ...) – Where inputs is a +tensor or a nested structure of tensors (e.g. a list of tensors), and +where ... are non-tensor arguments (non-inputs).
  • +
  • +call(inputs, training = NULL, ...) – Where +training is a boolean indicating whether the layer should +behave in training mode and inference mode.
  • +
  • +call(inputs, mask = NULL, ...) – Where +mask is a boolean mask tensor (useful for RNNs, for +instance).
  • +
  • +call(inputs, training = NULL, mask = NULL, ...) – Of +course, you can have both masking and training-specific behavior at the +same time.
  • +
+

Additionally, if you implement the get_config() method +on your custom Layer or model, the functional models you create will +still be serializable and cloneable.

+

Here’s a quick example of a custom RNN, written from scratch, being +used in a functional model:

+
+units <- 32
+timesteps <- 10
+input_dim <- 5
+batch_size <- 16
+
+layer_custom_rnn <- Layer(
+  "custom_rnn",
+  initialize = function(...) {
+    super$initialize(...)
+    self$units <- units
+    self$projection_1 <- layer_dense(units = units, activation = "tanh")
+    self$projection_2 <- layer_dense(units = units, activation = "tanh")
+    self$classifier <- layer_dense(units = 1)
+  },
+  call = function(inputs, ...) {
+    outputs <- list()
+    state <- op_zeros(c(shape(inputs)[[1]], self$units))
+    for (t in 1:(shape(inputs)[[2]])) {
+      x <- inputs[, t, ]
+      h <- self$projection_1(x)
+      y <- h + self$projection_2(state)
+      state <- y
+      outputs[[t]] <- y
+    }
+    features <- op_stack(outputs, axis = 2)
+    self$classifier(features)
+  }
+)
+
+# Note that you specify a static batch size for the inputs with the `batch_shape`
+# arg, because the inner computation of `layer_custom_rnn()` requires a static batch size
+# (when you create the `state` zeros tensor).
+inputs <- keras_input(batch_shape = shape(batch_size, timesteps, input_dim))
+outputs <- inputs |>
+  layer_conv_1d(filters = 32, kernel_size = 3) |>
+  layer_custom_rnn()
+
+model <- keras_model(inputs, outputs)
+out <- model(op_zeros(c(1, 10, 5)))
+
+
+
+ + + +
+ + + +
+
+ + + + + + diff --git a/docs/articles/functional_api/unnamed-chunk-10-1.png b/docs/articles/functional_api/unnamed-chunk-10-1.png new file mode 100644 index 0000000000..94a54466e9 Binary files /dev/null and b/docs/articles/functional_api/unnamed-chunk-10-1.png differ diff --git a/docs/articles/functional_api/unnamed-chunk-11-1.png b/docs/articles/functional_api/unnamed-chunk-11-1.png new file mode 100644 index 0000000000..2e9ecf2cfe Binary files /dev/null and b/docs/articles/functional_api/unnamed-chunk-11-1.png differ diff --git a/docs/articles/functional_api/unnamed-chunk-20-1.png b/docs/articles/functional_api/unnamed-chunk-20-1.png new file mode 100644 index 0000000000..9b1a6efeb0 Binary files /dev/null and b/docs/articles/functional_api/unnamed-chunk-20-1.png differ diff --git a/docs/articles/functional_api/unnamed-chunk-25-1.png b/docs/articles/functional_api/unnamed-chunk-25-1.png new file mode 100644 index 0000000000..2b7ae2386d Binary files /dev/null and b/docs/articles/functional_api/unnamed-chunk-25-1.png differ diff --git a/docs/articles/getting_started.html b/docs/articles/getting_started.html index eef5584e9b..3f6149b747 100644 --- a/docs/articles/getting_started.html +++ b/docs/articles/getting_started.html @@ -1,11 +1,365 @@ - - + + + - - + + + +Getting Started with Keras • keras3 + + + + + + + + + + + + + + - - + Skip to contents + + +
+ + + + +
+
+ + +
+

Overview +

+

Keras is a high-level neural +networks API developed with a focus on enabling fast experimentation. +Being able to go from idea to result with the least possible delay +is key to doing good research. Keras has the following key +features:

+
    +
  • Allows the same code to run on CPU or on GPU, +seamlessly.

  • +
  • User-friendly API which makes it easy to quickly prototype deep +learning models.

  • +
  • Built-in support for convolutional networks (for computer +vision), recurrent networks (for sequence processing), and any +combination of both.

  • +
  • Supports arbitrary network architectures: multi-input or +multi-output models, layer sharing, model sharing, etc. This means that +Keras is appropriate for building essentially any deep learning model, +from a memory network to a neural Turing machine.

  • +
+

This website provides documentation for the R interface to Keras. See +the main Keras website at https://keras.io for additional information on the +project.

+
+
+

Installation +

+

First, install the keras R package:

+
+install.packages("keras3")
+

or install the development version with:

+
+remotes::install_github("rstudio/keras")
+

The Keras R interface requires that a backend engine be installed. +This is TensorFlow by +default.

+
+keras3::install_keras(backend = "tensorflow")
+

This will provide you with default installation of Keras and +TensorFlow that is GPU capable, if a GPU is available. If you want a +more customized installation, e.g. see the documentation for +install_keras() and the installation +section.

+
+
+

MNIST Example +

+

We can learn the basics of Keras by walking through a simple example: +recognizing handwritten digits from the MNIST dataset. +MNIST consists of 28 x 28 grayscale images of handwritten digits like +these:

+

+

The dataset also includes labels for each image, telling us which +digit it is. For example, the labels for the above images are 5, 0, 4, +and 1.

+
+

Preparing the Data +

+

The MNIST dataset is included with Keras and can be accessed using +the dataset_mnist() function. Here we load the dataset then +create variables for our test and training data:

+
+library(keras3)
+mnist <- dataset_mnist()
+x_train <- mnist$train$x
+y_train <- mnist$train$y
+x_test <- mnist$test$x
+y_test <- mnist$test$y
+

The x data is a 3-d array +(images, width, height) of grayscale values. To prepare the +data for training we convert the 3-d arrays into matrices by reshaping +width and height into a single dimension (28x28 images are flattened +into length 784 vectors). Then, we convert the grayscale values from +integers ranging between 0 to 255 into floating point values ranging +between 0 and 1:

+
+# reshape
+x_train <- array_reshape(x_train, c(nrow(x_train), 784))
+x_test <- array_reshape(x_test, c(nrow(x_test), 784))
+# rescale
+x_train <- x_train / 255
+x_test <- x_test / 255
+

Note that we use the array_reshape() function rather +than the dim<-() function to reshape the array. This is +so that the data is re-interpreted using row-major semantics (as opposed +to R’s default column-major semantics), which is in turn compatible with +the way that the numerical libraries called by Keras interpret array +dimensions.

+

The y data is an integer vector with values ranging from +0 to 9. To prepare this data for training we one-hot encode the vectors +into binary class matrices using the Keras to_categorical() +function:

+
+y_train <- to_categorical(y_train, 10)
+y_test <- to_categorical(y_test, 10)
+
+
+

Defining the Model +

+

The core data structure of Keras is a model, a way to organize +layers. The simplest type of model is the Sequential +model, a linear stack of layers.

+

We begin by creating a sequential model and then adding layers using +the pipe (|>) operator:

+
+model <- keras_model_sequential(input_shape = c(784))
+model |>
+  layer_dense(units = 256, activation = 'relu') |>
+  layer_dropout(rate = 0.4) |>
+  layer_dense(units = 128, activation = 'relu') |>
+  layer_dropout(rate = 0.3) |>
+  layer_dense(units = 10, activation = 'softmax')
+

The input_shape argument to the first layer specifies +the shape of the input data (a length 784 numeric vector representing a +grayscale image). The final layer outputs a length 10 numeric vector +(probabilities for each digit) using a softmax activation +function.

+

Use the summary() function to print the details of the +model:

+
+summary(model)
+
## Model: "sequential"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ dense (Dense)                   │ (None, 256)            │       200,960
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, 256)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_1 (Dense)                 │ (None, 128)            │        32,896
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout_1 (Dropout)             │ (None, 128)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_2 (Dense)                 │ (None, 10)             │         1,290
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 235,146 (918.54 KB)
+##  Trainable params: 235,146 (918.54 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+plot(model)
+

+Next, compile the model with appropriate loss function, optimizer, and +metrics:

+
+model |> compile(
+  loss = 'categorical_crossentropy',
+  optimizer = optimizer_rmsprop(),
+  metrics = c('accuracy')
+)
+
+
+

Training and Evaluation +

+

Use the fit() function to train the model for 30 epochs +using batches of 128 images:

+
+history <- model |> fit(
+  x_train, y_train,
+  epochs = 30, batch_size = 128,
+  validation_split = 0.2
+)
+

The history object returned by fit() +includes loss and accuracy metrics which we can plot:

+
+plot(history)
+
+plot of chunk unnamed-chunk-12
plot of chunk unnamed-chunk-12
+
+

Evaluate the model’s performance on the test data:

+
+model |> evaluate(x_test, y_test)
+
## 313/313 - 1s - 2ms/step - accuracy: 0.9808 - loss: 0.0869
+
## $accuracy
+## [1] 0.9808
+##
+## $loss
+## [1] 0.08689082
+

Generate predictions on new data:

+
+probs <- model |> predict(x_test)
+
## 313/313 - 0s - 1ms/step
+
+max.col(probs) - 1L
+
##   [1] 7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4 9 6 6 5 4 0 7 4 0 1 3 1 3 4 7
+##  [36] 2 7 1 2 1 1 7 4 2 3 5 1 2 4 4 6 3 5 5 6 0 4 1 9 5 7 8 9 3 7 4 6 4 3 0
+##  [71] 7 0 2 9 1 7 3 2 9 7 7 6 2 7 8 4 7 3 6 1 3 6 9 3 1 4 1 7 6 9
+##  [ reached getOption("max.print") -- omitted 9900 entries ]
+

Keras provides a vocabulary for building deep learning models that is +simple, elegant, and intuitive. Building a question answering system, an +image classification model, a neural Turing machine, or any other model +is just as straightforward.

+
+
+

Deep Learning with R Book +

+

If you want a more comprehensive introduction to both Keras and the +concepts and practice of deep learning, we recommend the Deep Learning with R, 2nd Edition +book from Manning. This book is a collaboration between François +Chollet, the creator of (Python) Keras, J.J. Allaire, who wrote the +original R interface to Keras, and Tomasz Kalinowski, the maintainer of +the R interface to Keras.

+

The book presumes no significant knowledge of machine learning and +deep learning, and goes all the way from basic theory to advanced +practical applications, all using the R interface to Keras.

+
+ +
+
+
+
+

Why this name, Keras? +

+

Keras (κέρας) means horn in Greek. It is a reference to a literary +image from ancient Greek and Latin literature, first found in the +Odyssey, where dream spirits (Oneiroi, singular Oneiros) are divided +between those who deceive men with false visions, who arrive to Earth +through a gate of ivory, and those who announce a future that will come +to pass, who arrive through a gate of horn. It’s a play on the words +κέρας (horn) / κραίνω (fulfill), and ἐλέφας (ivory) / ἐλεφαίρομαι +(deceive).

+

Keras was initially developed as part of the research effort of +project ONEIROS (Open-ended Neuro-Electronic Intelligent Robot Operating +System).

+
+

“Oneiroi are beyond our unravelling –who can be sure what tale they +tell? Not all that men look for comes to pass. Two gates there are that +give passage to fleeting Oneiroi; one is made of horn, one of ivory. The +Oneiroi that pass through sawn ivory are deceitful, bearing a message +that will not be fulfilled; those that come out through polished horn +have truth behind them, to be accomplished for men who see them.” Homer, +Odyssey 19. 562 ff (Shewring translation).

+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/getting_started/unnamed-chunk-12-1.png b/docs/articles/getting_started/unnamed-chunk-12-1.png new file mode 100644 index 0000000000..b070284812 Binary files /dev/null and b/docs/articles/getting_started/unnamed-chunk-12-1.png differ diff --git a/docs/articles/getting_started/unnamed-chunk-9-1.png b/docs/articles/getting_started/unnamed-chunk-9-1.png new file mode 100644 index 0000000000..d31eac9bcf Binary files /dev/null and b/docs/articles/getting_started/unnamed-chunk-9-1.png differ diff --git a/website/articles/images/MNIST.png b/docs/articles/images/MNIST.png similarity index 100% rename from website/articles/images/MNIST.png rename to docs/articles/images/MNIST.png diff --git a/docs/articles/index.html b/docs/articles/index.html index cb35db6ea4..5b7e63de80 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,11 +1,142 @@ - - - - - - - - - - + +Articles • keras3 + Skip to contents + + +
+
+
+ +
+

All vignettes

+
+ +
Convolutional autoencoder for image denoising
+

How to train a deep convolutional autoencoder for image denoising.

+
Customizing what happens in `fit()` with TensorFlow
+

Overriding the training step of the Model class with TensorFlow.

+
Multi-GPU distributed training with TensorFlow
+

Guide to multi-GPU training for Keras models with TensorFlow.

+
Distributed training with Keras 3
+

Complete guide to the distribution API for multi-backend Keras.

+
The Functional API
+

Complete guide to the functional API.

+
Getting Started with Keras
+
+
Imbalanced classification: credit card fraud detection
+

Demonstration of how to handle highly imbalanced classification problems.

+
Keras examples
+
+
Introduction to Keras for engineers
+

First contact with Keras 3.

+
Introduction to Keras for Researchers
+

Everything you need to know to use Keras & TensorFlow for deep learning research.

+
Making new layers and models via subclassing
+

Complete guide to writing Layer and Model objects from scratch.

+
Simple MNIST convnet
+

A simple convnet that achieves ~99% test accuracy on MNIST.

+
Image segmentation with a U-Net-like architecture
+

Image segmentation model trained from scratch on the Oxford Pets dataset.

+
The Sequential model
+

Complete guide to the Sequential model.

+
Save, serialize, and export models
+

Complete guide to saving, serializing, and exporting models.

+
Structured data classification with FeatureSpace
+

Classify tabular data in a few lines of code.

+
Text classification from scratch
+

Text sentiment classification starting from raw text files.

+
Timeseries anomaly detection using an Autoencoder
+

Detect anomalies in a timeseries using an Autoencoder.

+
Timeseries classification from scratch
+

Training a timeseries classifier from scratch on the FordA dataset from the UCR/UEA archive.

+
Training & evaluation with the built-in methods
+

Complete guide to training & evaluation with fit() and evaluate().

+
Transfer learning & fine-tuning
+

Complete guide to transfer learning & fine-tuning in Keras.

+
Understanding masking & padding
+

Complete guide to using mask-aware sequence layers in Keras.

+
Writing a training loop from scratch in TensorFlow
+

Complete guide to writing low-level training & evaluation loops in TensorFlow.

+
Writing your own callbacks
+

Complete guide to writing new Keras callbacks.

+
+
+ + +
+ + + +
+ + + + + + diff --git a/docs/articles/intro_to_keras_for_engineers.html b/docs/articles/intro_to_keras_for_engineers.html new file mode 100644 index 0000000000..5d76f3fb7b --- /dev/null +++ b/docs/articles/intro_to_keras_for_engineers.html @@ -0,0 +1,528 @@ + + + + + + + + +Introduction to Keras for engineers • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

Keras 3 is a deep learning framework works with TensorFlow, JAX, and +PyTorch interchangeably. This notebook will walk you through key Keras 3 +workflows.

+

Let’s start by installing Keras 3:

+

pip install keras –upgrade –quiet

+
+
+

Setup +

+

We’re going to be using the tensorflow backend here – but you can +edit the string below to "jax" or "torch" and +hit “Restart runtime”, and the whole notebook will run just the same! +This entire guide is backend-agnostic.

+
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(keras3)
+
+# Note that you must configure the backend
+# before calling any other keras functions.
+# The backend cannot be changed once the
+# package is imported.
+use_backend("tensorflow")
+
+
+

A first example: A MNIST convnet +

+

Let’s start with the Hello World of ML: training a convnet to +classify MNIST digits.

+

Here’s the data:

+
+# Load the data and split it between train and test sets
+c(c(x_train, y_train), c(x_test, y_test)) %<-% keras3::dataset_mnist()
+
+# Scale images to the [0, 1] range
+x_train <- x_train / 255
+x_test <- x_test / 255
+# Make sure images have shape (28, 28, 1)
+x_train <- op_expand_dims(x_train, -1)
+x_test <- op_expand_dims(x_test, -1)
+
+dim(x_train)
+
## [1] 60000    28    28     1
+
+dim(x_test)
+
## [1] 10000    28    28     1
+

Here’s our model.

+

Different model-building options that Keras offers include:

+ +
+# Model parameters
+num_classes <- 10
+input_shape <- c(28, 28, 1)
+
+model <- keras_model_sequential(input_shape = input_shape)
+model |>
+  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") |>
+  layer_conv_2d(filters = 64, kernel_size = c(3, 3), activation = "relu") |>
+  layer_max_pooling_2d(pool_size = c(2, 2)) |>
+  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") |>
+  layer_conv_2d(filters = 128, kernel_size = c(3, 3), activation = "relu") |>
+  layer_global_average_pooling_2d() |>
+  layer_dropout(rate = 0.5) |>
+  layer_dense(units = num_classes, activation = "softmax")
+

Here’s our model summary:

+
+summary(model)
+
## Model: "sequential"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv2d (Conv2D)                 │ (None, 26, 26, 64)     │           640
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 24, 24, 64)     │        36,928
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 12, 12, 64)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 10, 10, 128)    │        73,856
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_3 (Conv2D)               │ (None, 8, 8, 128)      │       147,584
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_average_pooling2d        │ (None, 128)            │             0
+## │ (GlobalAveragePooling2D)        │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dropout (Dropout)               │ (None, 128)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense (Dense)                   │ (None, 10)             │         1,290
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 260,298 (1016.79 KB)
+##  Trainable params: 260,298 (1016.79 KB)
+##  Non-trainable params: 0 (0.00 B)
+

We use the compile() method to specify the optimizer, +loss function, and the metrics to monitor. Note that with the JAX and +TensorFlow backends, XLA compilation is turned on by default.

+
+model |> compile(
+  optimizer = "adam",
+  loss = "sparse_categorical_crossentropy",
+  metrics = list(
+    metric_sparse_categorical_accuracy(name = "acc")
+  )
+)
+

Let’s train and evaluate the model. We’ll set aside a validation +split of 15% of the data during training to monitor generalization on +unseen data.

+
+batch_size <- 128
+epochs <- 10
+
+callbacks <- list(
+  callback_model_checkpoint(filepath="model_at_epoch_{epoch}.keras"),
+  callback_early_stopping(monitor="val_loss", patience=2)
+)
+
+model |> fit(
+  x_train, y_train,
+  batch_size = batch_size,
+  epochs = epochs,
+  validation_split = 0.15,
+  callbacks = callbacks
+)
+
## Epoch 1/10
+## 399/399 - 7s - 18ms/step - acc: 0.7445 - loss: 0.7534 - val_acc: 0.9630 - val_loss: 0.1260
+## Epoch 2/10
+## 399/399 - 2s - 5ms/step - acc: 0.9362 - loss: 0.2103 - val_acc: 0.9774 - val_loss: 0.0762
+## Epoch 3/10
+## 399/399 - 2s - 5ms/step - acc: 0.9560 - loss: 0.1492 - val_acc: 0.9830 - val_loss: 0.0607
+## Epoch 4/10
+## 399/399 - 2s - 5ms/step - acc: 0.9650 - loss: 0.1187 - val_acc: 0.9859 - val_loss: 0.0494
+## Epoch 5/10
+## 399/399 - 2s - 5ms/step - acc: 0.9716 - loss: 0.1003 - val_acc: 0.9866 - val_loss: 0.0485
+## Epoch 6/10
+## 399/399 - 2s - 5ms/step - acc: 0.9744 - loss: 0.0878 - val_acc: 0.9888 - val_loss: 0.0381
+## Epoch 7/10
+## 399/399 - 2s - 5ms/step - acc: 0.9763 - loss: 0.0799 - val_acc: 0.9896 - val_loss: 0.0378
+## Epoch 8/10
+## 399/399 - 2s - 5ms/step - acc: 0.9791 - loss: 0.0686 - val_acc: 0.9862 - val_loss: 0.0454
+## Epoch 9/10
+## 399/399 - 2s - 5ms/step - acc: 0.9800 - loss: 0.0665 - val_acc: 0.9889 - val_loss: 0.0412
+
+score <- model |> evaluate(x_test, y_test, verbose = 0)
+

During training, we were saving a model at the end of each epoch. You +can also save the model in its latest state like this:

+
+save_model(model, "final_model.keras", overwrite=TRUE)
+

And reload it like this:

+
+model <- load_model("final_model.keras")
+

Next, you can query predictions of class probabilities with +predict():

+
+predictions <- model |> predict(x_test)
+
## 313/313 - 1s - 2ms/step
+
+dim(predictions)
+
## [1] 10000    10
+

That’s it for the basics!

+
+
+

Writing cross-framework custom components +

+

Keras enables you to write custom Layers, Models, Metrics, Losses, +and Optimizers that work across TensorFlow, JAX, and PyTorch with the +same codebase. Let’s take a look at custom layers first.

+

The op_ namespace contains:

+
    +
  • An implementation of the NumPy API, e.g. op_stack or +op_matmul.
  • +
  • A set of neural network specific ops that are absent from NumPy, +such as op_conv or +op_binary_crossentropy.
  • +
+

Let’s make a custom Dense layer that works with all +backends:

+
+layer_my_dense <- Layer(
+  classname = "MyDense",
+  initialize = function(units, activation = NULL, name = NULL, ...) {
+    super$initialize(name = name, ...)
+    self$units <- units
+    self$activation <- activation
+  },
+  build = function(input_shape) {
+    input_dim <- tail(input_shape, 1)
+    self$w <- self$add_weight(
+      shape = shape(input_dim, self$units),
+      initializer = initializer_glorot_normal(),
+      name = "kernel",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(self$units),
+      initializer = initializer_zeros(),
+      name = "bias",
+      trainable = TRUE
+    )
+  },
+  call = function(inputs) {
+    # Use Keras ops to create backend-agnostic layers/metrics/etc.
+    x <- op_matmul(inputs, self$w) + self$b
+    if (!is.null(self$activation))
+      x <- self$activation(x)
+    x
+  }
+)
+

Next, let’s make a custom Dropout layer that relies on +the random_* namespace:

+
+layer_my_dropout <- Layer(
+  "MyDropout",
+  initialize = function(rate, name = NULL, seed = NULL, ...) {
+    super$initialize(name = name)
+    self$rate <- rate
+    # Use seed_generator for managing RNG state.
+    # It is a state element and its seed variable is
+    # tracked as part of `layer$variables`.
+    self$seed_generator <- random_seed_generator(seed)
+  },
+  call = function(inputs) {
+    # Use `keras3::random_*` for random ops.
+    random_dropout(inputs, self$rate, seed = self$seed_generator)
+  }
+)
+

Next, let’s write a custom subclassed model that uses our two custom +layers:

+
+MyModel <- Model(
+  "MyModel",
+  initialize = function(num_classes, ...) {
+    super$initialize(...)
+    self$conv_base <-
+      keras_model_sequential() |>
+      layer_conv_2d(64, kernel_size = c(3, 3), activation = "relu") |>
+      layer_conv_2d(64, kernel_size = c(3, 3), activation = "relu") |>
+      layer_max_pooling_2d(pool_size = c(2, 2)) |>
+      layer_conv_2d(128, kernel_size = c(3, 3), activation = "relu") |>
+      layer_conv_2d(128, kernel_size = c(3, 3), activation = "relu") |>
+      layer_global_average_pooling_2d()
+
+    self$dp <- layer_my_dropout(rate = 0.5)
+    self$dense <- layer_my_dense(units = num_classes,
+                                 activation = activation_softmax)
+  },
+  call = function(inputs) {
+    inputs |>
+      self$conv_base() |>
+      self$dp() |>
+      self$dense()
+  }
+)
+

Let’s compile it and fit it:

+
+model <- MyModel(num_classes = 10)
+model |> compile(
+  loss = loss_sparse_categorical_crossentropy(),
+  optimizer = optimizer_adam(learning_rate = 1e-3),
+  metrics = list(
+    metric_sparse_categorical_accuracy(name = "acc")
+  )
+)
+
+model |> fit(
+  x_train, y_train,
+  batch_size = batch_size,
+  epochs = 1, # For speed
+  validation_split = 0.15
+)
+
## 399/399 - 8s - 19ms/step - acc: 0.7347 - loss: 0.7746 - val_acc: 0.9247 - val_loss: 0.2487
+
+
+

Training models on arbitrary data sources +

+

All Keras models can be trained and evaluated on a wide variety of +data sources, independently of the backend you’re using. This +includes:

+
    +
  • Arrays
  • +
  • Dataframes
  • +
  • TensorFlow tf_dataset objects
  • +
  • PyTorch DataLoader objects
  • +
  • Keras PyDataset objects
  • +
+

They all work whether you’re using TensorFlow, JAX, or PyTorch as +your Keras backend.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

Let’s try this out with tf_dataset:

+
+library(tfdatasets, exclude = "shape")
+
+train_dataset <- list(x_train, y_train) |>
+  tensor_slices_dataset() |>
+  dataset_batch(batch_size) |>
+  dataset_prefetch(buffer_size = tf$data$AUTOTUNE)
+
+test_dataset <- list(x_test, y_test) |>
+  tensor_slices_dataset() |>
+  dataset_batch(batch_size) |>
+  dataset_prefetch(buffer_size = tf$data$AUTOTUNE)
+
+model <- MyModel(num_classes = 10)
+model |> compile(
+  loss = loss_sparse_categorical_crossentropy(),
+  optimizer = optimizer_adam(learning_rate = 1e-3),
+  metrics = list(
+    metric_sparse_categorical_accuracy(name = "acc")
+  )
+)
+
+model |> fit(train_dataset, epochs = 1, validation_data = test_dataset)
+
## 469/469 - 9s - 20ms/step - acc: 0.7534 - loss: 0.7366 - val_acc: 0.8981 - val_loss: 0.3329
+
+
+

Further reading +

+

This concludes our short overview of the new multi-backend +capabilities of Keras 3. Next, you can learn about:

+
+

How to customize what happens in fit() +

+

Want to implement a non-standard training algorithm yourself but +still want to benefit from the power and usability of +fit()? It’s easy to customize fit() to support +arbitrary use cases:

+ +
+
+
+

How to write custom training loops +

+ +
+
+

How to distribute training +

+ +

Enjoy the library! 🚀

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/intro_to_keras_for_researchers.html b/docs/articles/intro_to_keras_for_researchers.html new file mode 100644 index 0000000000..8f47296797 --- /dev/null +++ b/docs/articles/intro_to_keras_for_researchers.html @@ -0,0 +1,1388 @@ + + + + + + + + +Introduction to Keras for Researchers • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+ +
+
+

Introduction +

+

Are you a machine learning researcher? Do you publish at NeurIPS and +push the state-of-the-art in CV and NLP? This guide will serve as your +first introduction to core Keras & TensorFlow API concepts.

+

In this guide, you will learn about:

+
    +
  • Tensors, variables, and gradients in TensorFlow
  • +
  • Creating layers by subclassing the [Layer] class
  • +
  • Writing low-level training loops
  • +
  • Tracking losses created by layers via the add_loss() +method
  • +
  • Tracking metrics in a low-level training loop
  • +
  • Speeding up execution with a compiled +[tensorflow::tf_function()]
  • +
  • Executing layers in training or inference mode
  • +
  • The Keras Functional API
  • +
+

You will also see the Keras API in action in two end-to-end research +examples: a Variational Autoencoder, and a Hypernetwork.

+
+
+

Tensors +

+

TensorFlow is an infrastructure layer for differentiable programming. +At its heart, it’s a framework for manipulating N-dimensional arrays +(tensors), much like NumPy.

+

However, there are three key differences between NumPy and +TensorFlow:

+
    +
  • TensorFlow can leverage hardware accelerators such as GPUs and +TPUs.
  • +
  • TensorFlow can automatically compute the gradient of arbitrary +differentiable tensor expressions.
  • +
  • TensorFlow computation can be distributed to large numbers of +devices on a single machine, and large number of machines (potentially +with multiple devices each).
  • +
+

Let’s take a look at the object that is at the core of TensorFlow: +the Tensor.

+

Here’s a constant tensor:

+
+x <- tf$constant(rbind(c(5, 2), c(1, 3)))
+print(x)
+
## tf.Tensor(
+## [[5. 2.]
+##  [1. 3.]], shape=(2, 2), dtype=float64)
+

You can get its value as a R array by calling +as.array():

+ +
##      [,1] [,2]
+## [1,]    5    2
+## [2,]    1    3
+

It features the attributes dtype and +shape:

+
+x$dtype
+
## tf.float64
+
+x$shape
+
## TensorShape([2, 2])
+

A common way to create constant tensors is via tf$ones +and tf$zeros:

+
+tf$ones(shape = shape(2, 1))
+
## tf.Tensor(
+## [[1.]
+##  [1.]], shape=(2, 1), dtype=float32)
+
+tf$zeros(shape = shape(2, 1))
+
## tf.Tensor(
+## [[0.]
+##  [0.]], shape=(2, 1), dtype=float32)
+

You can also create random constant tensors:

+
+x <- random_normal(shape = c(2, 2), mean = 0.0, stddev = 1.0)
+x <- random_uniform(shape = c(2, 2), minval = 0, maxval = 10)
+
+
+

Variables +

+

Variables are special tensors used to store mutable state (such as +the weights of a neural network). You create a Variable +using some initial value:

+
+initial_value <- random_normal(shape=c(2, 2))
+a <- tf$Variable(initial_value)
+print(a)
+
## <tf.Variable 'Variable:0' shape=(2, 2) dtype=float32, numpy=
+## array([[ 0.9057419 ,  0.7916686 ],
+##        [ 0.28754202, -0.5408822 ]], dtype=float32)>
+

You update the value of a Variable by using the methods +$assign(value), $assign_add(increment), or +$assign_sub(decrement):

+
+new_value <- random_normal(shape=c(2, 2))
+a$assign(new_value)
+
## <tf.Variable 'UnreadVariable' shape=(2, 2) dtype=float32, numpy=
+## array([[-0.3405368 , -2.1463926 ],
+##        [ 1.2602988 ,  0.12241419]], dtype=float32)>
+
+added_value <- random_normal(shape=c(2, 2))
+a$assign_add(added_value)
+
## <tf.Variable 'UnreadVariable' shape=(2, 2) dtype=float32, numpy=
+## array([[ 0.04820395, -2.6854615 ],
+##        [ 0.23246336,  1.4535258 ]], dtype=float32)>
+
+
+

Doing math in TensorFlow +

+

If you’ve used NumPy, doing math in TensorFlow will look very +familiar. The main difference is that your TensorFlow code can run on +GPU and TPU.

+
+a <- random_normal(shape=c(2, 2))
+b <- random_normal(shape=c(2, 2))
+
+c <- a + b
+d <- tf$square(c)
+e <- tf$exp(d)
+
+
+

Gradients +

+

Here’s another big difference with R: you can automatically retrieve +the gradient of any differentiable expression.

+

Just open a GradientTape, start “watching” a tensor via +tape$watch(), and compose a differentiable expression using +this tensor as input:

+
+a <- random_normal(shape=c(2, 2))
+b <- random_normal(shape=c(2, 2))
+
+with(tf$GradientTape() %as% tape, {
+  tape$watch(a)  # Start recording the history of operations applied to `a`
+  c <- tf$sqrt(tf$square(a) + tf$square(b))  # Do some math using `a`
+  # What's the gradient of `c` with respect to `a`?
+  dc_da <- tape$gradient(c, a)
+  print(dc_da)
+})
+
## tf.Tensor(
+## [[ 0.9969011  -0.7707146 ]
+##  [ 0.23378514  0.96255165]], shape=(2, 2), dtype=float32)
+

By default, variables are watched automatically, so you don’t need to +manually watch them:

+
+a <- tf$Variable(a)
+
+with(tf$GradientTape() %as% tape, {
+  c <- tf$sqrt(tf$square(a) + tf$square(b))
+  dc_da <- tape$gradient(c, a)
+  print(dc_da)
+})
+
## tf.Tensor(
+## [[ 0.9969011  -0.7707146 ]
+##  [ 0.23378514  0.96255165]], shape=(2, 2), dtype=float32)
+

Note that you can compute higher-order derivatives by nesting +tapes:

+
+with(tf$GradientTape() %as% outer_tape, {
+  with(tf$GradientTape() %as% tape, {
+    c <- tf$sqrt(tf$square(a) + tf$square(b))
+    dc_da <- tape$gradient(c, a)
+  })
+  d2c_da2 <- outer_tape$gradient(dc_da, a)
+  print(d2c_da2)
+})
+
## tf.Tensor(
+## [[3.3447742e-03 7.1282005e-01]
+##  [5.7464113e+00 5.5013180e-02]], shape=(2, 2), dtype=float32)
+
+
+

Keras layers +

+

While TensorFlow is an infrastructure layer for +differentiable programming, dealing with tensors, variables, +and gradients, Keras is a user interface for deep +learning, dealing with layers, models, optimizers, loss +functions, metrics, and more.

+

Keras serves as the high-level API for TensorFlow: Keras is what +makes TensorFlow simple and productive.

+

The Layer class is the fundamental abstraction in Keras. +A Layer encapsulates a state (weights) and some computation +(defined in the call method).

+

A simple layer looks like this. The self$add_weight() +method gives you a shortcut for creating weights:

+
+Linear <- new_layer_class(
+  "Linear",
+  initialize = function(units = 32, input_dim = 32) {
+    super$initialize()
+    self$w <- self$add_weight(
+      shape = shape(input_dim, units),
+      initializer = "random_normal",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(units),
+      initializer = "zeros",
+      trainable = TRUE
+    )
+  },
+  call = function(inputs) {
+    tf$matmul(inputs, self$w) + self$b
+  }
+)
+

You would use a Layer instance much like a R +function:

+
+# Instantiate our layer.
+linear_layer <- Linear(units=4, input_dim=2)
+
+# The layer can be treated as a function.
+# Here we call it on some data.
+y <- linear_layer(tf$ones(shape(2, 2)))
+

The weight variables (created in initialize) are +automatically tracked under the weights property:

+
+linear_layer$weights
+
## [[1]]
+## <KerasVariable shape=(2, 4), dtype=float32, path=linear/variable>
+##
+## [[2]]
+## <KerasVariable shape=(4), dtype=float32, path=linear/variable_1>
+

You have many built-in layers available, from Dense to +Conv2D to LSTM to fancier ones like +Conv3DTranspose or ConvLSTM2D. Be smart about +reusing built-in functionality.

+
+
+

Layer weight creation in build(input_shape) +

+

It’s often a good idea to defer weight creation to the +build() method, so that you don’t need to specify the input +dim/shape at layer construction time:

+
+Linear <- new_layer_class(
+  "Linear",
+  initialize = function(units = 32) {
+    super$initialize()
+    self$units <- units
+  },
+  build = function(input_shape) {
+    self$w <- self$add_weight(
+      shape = shape(input_shape[-1], self$units),
+      initializer = "random_normal",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(self$units),
+      initializer = "zeros",
+      trainable = TRUE
+    )
+  },
+  call = function(inputs) {
+    tf$matmul(inputs, self$w) + self$b
+  }
+)
+
+# Instantiate our layer.
+linear_layer <- Linear(units = 4)
+
+# This will also call `build(input_shape)` and create the weights.
+y <- linear_layer(tf$ones(shape(2, 2)))
+
+
+

Layer gradients +

+

You can automatically retrieve the gradients of the weights of a +layer by calling it inside a GradientTape. Using these +gradients, you can update the weights of the layer, either manually, or +using an optimizer object. Of course, you can modify the gradients +before using them, if you need to.

+
+# Prepare a dataset.
+c(c(x_train, y_train), .) %<-% dataset_mnist()
+
+x_train <- array_reshape(x_train, c(60000, 784)) / 255
+
+dataset <- tfdatasets::tensor_slices_dataset(list(x_train, y_train)) %>%
+  tfdatasets::dataset_shuffle(buffer_size=1024) %>%
+  tfdatasets::dataset_batch(64)
+
+# Instantiate our linear layer (defined above) with 10 units.
+linear_layer <- Linear(units = 10)
+
+# Instantiate a logistic loss function that expects integer targets.
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits=TRUE)
+
+# Instantiate an optimizer.
+optimizer <- optimizer_sgd(learning_rate=1e-3)
+
+# Iterate over the batches of the dataset.
+coro::loop(for(data in dataset) {
+  # Open a GradientTape.
+  with(tf$GradientTape() %as% tape, {
+    # Forward pass.
+    logits <- linear_layer(data[[1]])
+
+    # Loss value for this batch.
+    loss_value <- loss_fn(data[[2]], logits)
+  })
+
+  # Get gradients of the loss wrt the weights.
+  gradients <- tape$gradient(loss_value, linear_layer$trainable_weights)
+
+  # Update the weights of our linear layer.
+  optimizer$apply_gradients(zip_lists(gradients, linear_layer$trainable_weights))
+})
+loss_value
+
## tf.Tensor(1.2819729, shape=(), dtype=float32)
+
+
+

Trainable and non-trainable weights +

+

Weights created by layers can be either trainable or non-trainable. +They’re exposed in trainable_weights and +non_trainable_weights respectively. Here’s a layer with a +non-trainable weight:

+
+ComputeSum <- new_layer_class(
+  "ComputeSum",
+  initialize = function(input_dim) {
+    super$initialize()
+    # Create a non-trainable weight.
+    self$total <- self$add_weight(
+      initializer = "zeros",
+      shape = shape(input_dim),
+      trainable = FALSE
+    )
+  },
+  call = function(inputs) {
+    self$total$assign_add(tf$reduce_sum(inputs, axis=0L))
+    self$total
+  }
+)
+
+my_sum <- ComputeSum(input_dim = 2)
+x <- tf$ones(shape(2, 2))
+
+as.array(my_sum(x))
+
## [1] 2 2
+
+as.array(my_sum(x))
+
## [1] 4 4
+
+my_sum$trainable_weights
+
## list()
+
+
+

Layers that own layers +

+

Layers can be recursively nested to create bigger computation blocks. +Each layer will track the weights of its sublayers (both trainable and +non-trainable).

+
+# Let's reuse the Linear class
+# with a `build` method that we defined above.
+
+MLP <- new_layer_class(
+  "MLP",
+  initialize = function() {
+    super$initialize()
+    self$linear_1 <- Linear(units = 32)
+    self$linear_2 <- Linear(units = 32)
+    self$linear_3 <- Linear(units = 10)
+  },
+  call = function(inputs) {
+    x <- self$linear_1(inputs)
+    x <- tf$nn$relu(x)
+    x <- self$linear_2(x)
+    x <- tf$nn$relu(x)
+    return(self$linear_3(x))
+  }
+)
+
+mlp <- MLP()
+
+# The first call to the `mlp` object will create the weights.
+y <- mlp(tf$ones(shape=shape(3, 64)))
+
+# Weights are recursively tracked.
+length(mlp$weights)
+
## [1] 6
+

Note that our manually-created MLP above is equivalent to the +following built-in option:

+
+mlp <- keras_model_sequential() %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 10)
+
+
+

Tracking losses created by layers +

+

Layers can create losses during the forward pass via the +add_loss() method. This is especially useful for +regularization losses. The losses created by sublayers are recursively +tracked by the parent layers.

+

Here’s a layer that creates an activity regularization loss:

+
+# A layer that creates an activity sparsity regularization loss
+ActivityRegularization <- new_layer_class(
+  "ActivityRegularization",
+  initialize = function(rate=1e-2) {
+    super$initialize()
+    self$rate <- rate
+  },
+  call = function(inputs) {
+    self$add_loss(self$rate * tf$reduce_sum(tf$abs(inputs)))
+    inputs
+  }
+)
+

Any model incorporating this layer will track this regularization +loss:

+
+# Let's use the loss layer in a MLP block.
+SparseMLP <- new_layer_class(
+  "SparseMLP",
+  initialize = function() {
+    super$initialize()
+    self$linear_1 <- Linear(units = 32)
+    self$reg <- ActivityRegularization(rate = 1e-2)
+    self$linear_3 <- Linear(units = 10)
+  },
+  call = function(inputs) {
+    x <- self$linear_1(inputs)
+    x <- tf$nn$relu(x)
+    x <- self$reg(x)
+    return(self$linear_3(x))
+  }
+)
+
+mlp <- SparseMLP()
+y <- mlp(tf$ones(shape(10, 10)))
+
+mlp$losses  # List containing one float32 scalar
+
## [[1]]
+## tf.Tensor(0.18065463, shape=(), dtype=float32)
+

These losses are cleared by the top-level layer at the start of each +forward pass – they don’t accumulate. layer.losses always +contains only the losses created during the last forward pass. You would +typically use these losses by summing them before computing your +gradients when writing a training loop.

+
+# Losses correspond to the *last* forward pass.
+mlp <- SparseMLP()
+mlp(tf$ones(shape(10, 10)))
+
## tf.Tensor(
+## [[ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]], shape=(10, 10), dtype=float32)
+
+length(mlp$losses)
+
## [1] 1
+
+mlp(tf$ones(shape(10, 10)))
+
## tf.Tensor(
+## [[ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]
+##  [ 0.0388482  -0.03920118  0.01624808 -0.01361975 -0.01354899  0.07107338
+##   -0.01077365  0.05688906 -0.02838149 -0.04084621]], shape=(10, 10), dtype=float32)
+
+length(mlp$losses)  # No accumulation.
+
## [1] 1
+
+# Let's demonstrate how to use these losses in a training loop.
+
+# Prepare a dataset.
+c(c(x_train, y_train), .) %<-% dataset_mnist()
+x_train <- array_reshape(x_train, c(60000, 784)) / 255
+
+dataset <- tfdatasets::tensor_slices_dataset(list(x_train, y_train)) %>%
+  tfdatasets::dataset_shuffle(buffer_size=1024) %>%
+  tfdatasets::dataset_batch(64)
+
+# A new MLP.
+mlp <- SparseMLP()
+
+# Loss and optimizer.
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits=TRUE)
+optimizer <- optimizer_sgd(learning_rate=1e-3)
+
+coro::loop(for(data in dataset) {
+  x <- data[[1]]
+  y <- data[[2]]
+  with(tf$GradientTape() %as% tape, {
+    # Forward pass.
+    logits <- mlp(x)
+
+    # External loss value for this batch.
+    loss <- loss_fn(y, logits)
+
+    # Add the losses created during the forward pass.
+    loss <- loss + Reduce(`+`, mlp$losses)
+
+    # Get gradients of the loss wrt the weights.
+    gradients <- tape$gradient(loss, mlp$trainable_weights)
+
+    # Update the weights of our linear layer.
+    optimizer$apply_gradients(zip_lists(gradients, mlp$trainable_weights))
+  })
+})
+
+
+

Keeping track of training metrics +

+

Keras offers a broad range of built-in metrics, like +metric_auc or metric_precision_at_recall. It’s +also easy to create your own metrics in a few lines of code.

+

To use a metric in a custom training loop, you would:

+
    +
  • Instantiate the metric object, +e.g. metric = metric_auc() +
  • +
  • Call its metric$udpate_state(targets, predictions) +method for each batch of data
  • +
  • Query its result via metric$result() +
  • +
  • Reset the metric’s state at the end of an epoch or at the start of +an evaluation via metric$reset_state() +
  • +
+

Here’s a simple example:

+
+# Instantiate a metric object
+accuracy <- metric_sparse_categorical_accuracy()
+
+# Prepare our layer, loss, and optimizer.
+model <- keras_model_sequential() %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 10)
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits = TRUE)
+optimizer <- optimizer_adam(learning_rate=1e-3)
+
+for (epoch in seq_len(2)) {
+  coro::loop(for (data in dataset) {
+    x <- data[[1]]
+    y <- data[[2]]
+    with(tf$GradientTape() %as% tape, {
+      # Forward pass.
+      logits <- model(x)
+
+      # External loss value for this batch.
+      loss_value <- loss_fn(y, logits)
+    })
+
+    # Update the state of the `accuracy` metric.
+    accuracy$update_state(y, logits)
+
+    # Update the weights of the model to minimize the loss value.
+    gradients <- tape$gradient(loss_value, model$trainable_weights)
+    optimizer$apply_gradients(zip_lists(gradients, model$trainable_weights))
+
+  })
+  cat("Epoch:", epoch, "Accuracy:", as.numeric(accuracy$result()), "\n")
+  accuracy$reset_state()
+}
+
## Epoch: 1 Accuracy: 0.8757833
+## Epoch: 2 Accuracy: 0.93915
+

You can also define your own metrics by subclassing +keras.metrics.Metric. You need to override the three +functions called above:

+
    +
  • Override update_state() to update the statistic +values.
  • +
  • Override result() to return the metric value.
  • +
  • Override reset_state() to reset the metric to its +initial state.
  • +
+

Here is an example where we implement the F1-score metric (with +support for sample weighting).

+
+F1Score <- new_metric_class(
+  "F1Score",
+  initialize = function(self, name="f1_score", dtype="float32", threshold=0.5, ...) {
+    super$initialize(name=name, dtype=dtype, ...)
+    self$threshold <- threshold
+    self$true_positives <- self$add_weight(
+      name="tp", dtype=dtype, initializer="zeros"
+    )
+    self$false_positives <- self$add_weight(
+      name="fp", dtype=dtype, initializer="zeros"
+    )
+    self$false_negatives <- self$add_weight(
+      name="fn", dtype=dtype, initializer="zeros"
+    )
+  },
+  update_state = function(y_true, y_pred, sample_weight=NULL) {
+    y_pred <- tf$math$greater_equal(y_pred, self$threshold)
+    y_true <- tf$cast(y_true, tf$bool)
+    y_pred <- tf$cast(y_pred, tf$bool)
+
+    true_positives <- tf$cast(y_true & y_pred, self$dtype)
+    false_positives <- tf$cast((!y_true) & y_pred, self$dtype)
+    false_negatives <- tf$cast(y_true & (!y_pred), self$dtype)
+
+    if (!is.null(sample_weight)) {
+      sample_weight <- tf$cast(sample_weight, self$dtype)
+      true_positives <- true_positives * sample_weight
+      false_positives <- false_positives * sample_weight
+      false_negatives <- false_negatives * sample_weight
+    }
+
+    self$true_positives$assign_add(tf$reduce_sum(true_positives))
+    self$false_positives$assign_add(tf$reduce_sum(false_positives))
+    self$false_negatives$assign_add(tf$reduce_sum(false_negatives))
+  },
+
+  result = function() {
+    precision <- self$true_positives / (self$true_positives + self$false_positives)
+    recall <- self$true_positives / (self$true_positives + self$false_negatives)
+    f1_score <- 2 * precision * recall / (precision + recall)
+    f1_score
+  },
+
+  reset_state = function() {
+    self$true_positives$assign(0)
+    self$false_positives$assign(0)
+    self$false_negatives$assign(0)
+  }
+)
+

Let’s test-drive it:

+
+m <- F1Score()
+m$update_state(c(0, 1, 0, 0), c(0.3, 0.5, 0.8, 0.9))
+cat("Intermediate result:", as.numeric(m$result()), "\n")
+
## Intermediate result: 0.5
+
+m$update_state(c(1, 1, 1, 1), c(0.1, 0.7, 0.6, 0.0))
+cat("Final result:", as.numeric(m$result()), "\n")
+
## Final result: 0.6
+
+
+

Compiled functions +

+

Running eagerly is great for debugging, but you will get better +performance by compiling your computation into static graphs. Static +graphs are a researcher’s best friends. You can compile any function by +wrapping it in a tf.function decorator.

+
+# Prepare our layer, loss, and optimizer.
+model <- keras_model_sequential() %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 10)
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits = TRUE)
+optimizer <- optimizer_adam(learning_rate=1e-3)
+
+# Create a training step function.
+train_on_batch <- tf_function(function(x, y) {
+  with(tf$GradientTape() %as% tape, {
+    # Forward pass.
+    logits <- model(x)
+    # External loss value for this batch.
+    loss_value <- loss_fn(y, logits)
+  })
+  # Update the weights of the model to minimize the loss value.
+  gradients <- tape$gradient(loss_value, model$trainable_weights)
+  optimizer$apply_gradients(zip_lists(gradients, model$trainable_weights))
+  loss_value
+})
+
+
+# Prepare a dataset.
+c(c(x_train, y_train), .) %<-% dataset_mnist()
+x_train <- array_reshape(x_train, c(60000, 784)) / 255
+
+dataset <- tfdatasets::tensor_slices_dataset(list(x_train, y_train)) %>%
+  tfdatasets::dataset_shuffle(buffer_size=1024) %>%
+  tfdatasets::dataset_batch(64)
+
+i <- 0
+coro::loop(for (data in dataset) {
+  i <- i + 1
+  x <- data[[1]]
+  y <- data[[2]]
+  loss <- train_on_batch(x, y)
+  if (i %% 100 == 0)
+    cat("Loss:", as.numeric(loss), "\n")
+})
+
## Loss: 0.551749
+## Loss: 0.2131135
+## Loss: 0.2765952
+## Loss: 0.1296219
+## Loss: 0.2657076
+## Loss: 0.2683381
+## Loss: 0.1570166
+## Loss: 0.3139241
+## Loss: 0.08981849
+
+
+

Training mode & inference mode +

+

Some layers, in particular the BatchNormalization layer +and the Dropout layer, have different behaviors during +training and inference. For such layers, it is standard practice to +expose a training (boolean) argument in the +call method.

+

By exposing this argument in call, you enable the +built-in training and evaluation loops (e.g. fit) to correctly use the +layer in training and inference modes.

+
+Dropout <- new_layer_class(
+  "Dropout",
+  initialize = function(rate) {
+    super$initialize()
+    self$rate <- rate
+  },
+  call = function(inputs, training = NULL) {
+    if (!is.null(training) && training) {
+      return(tf$nn$dropout(inputs, rate = self$rate))
+    }
+    inputs
+  }
+)
+
+MLPWithDropout <- new_layer_class(
+  "MLPWithDropout",
+  initialize = function() {
+    super$initialize()
+    self$linear_1 <- Linear(units = 32)
+    self$dropout <- Dropout(rate = 0.5)
+    self$linear_3 <- Linear(units = 10)
+  },
+  call = function(inputs, training = NULL) {
+    x <- self$linear_1(inputs)
+    x <- tf$nn$relu(x)
+    x <- self$dropout(x, training = training)
+    self$linear_3(x)
+  }
+)
+
+mlp <- MLPWithDropout()
+y_train <- mlp(tf$ones(shape(2, 2)), training=TRUE)
+y_test <- mlp(tf$ones(shape(2, 2)), training=FALSE)
+
+
+

The Functional API for model-building +

+

To build deep learning models, you don’t have to use object-oriented +programming all the time. All layers we’ve seen so far can also be +composed functionally, like this (we call it the “Functional API”):

+
+# We use an `Input` object to describe the shape and dtype of the inputs.
+# This is the deep learning equivalent of *declaring a type*.
+# The shape argument is per-sample; it does not include the batch size.
+# The functional API focused on defining per-sample transformations.
+# The model we create will automatically batch the per-sample transformations,
+# so that it can be called on batches of data.
+inputs <- layer_input(shape = 16, dtype = "float32")
+
+# We call layers on these "type" objects
+# and they return updated types (new shapes/dtypes).
+outputs <- inputs %>%
+  Linear(units = 32) %>% # We are reusing the Linear layer we defined earlier.
+  Dropout(rate = 0.5) %>% # We are reusing the Dropout layer we defined earlier.
+  Linear(units = 10)
+
+# A functional `Model` can be defined by specifying inputs and outputs.
+# A model is itself a layer like any other.
+model <- keras_model(inputs, outputs)
+
+# A functional model already has weights, before being called on any data.
+# That's because we defined its input shape in advance (in `Input`).
+length(model$weights)
+
## [1] 4
+
+# Let's call our model on some data, for fun.
+y <- model(tf$ones(shape(2, 16)))
+y$shape
+
## TensorShape([2, 10])
+
+# You can pass a `training` argument in `__call__`
+# (it will get passed down to the Dropout layer).
+y <- model(tf$ones(shape(2, 16)), training=TRUE)
+

The Functional API tends to be more concise than subclassing, and +provides a few other advantages (generally the same advantages that +functional, typed languages provide over untyped OO development). +However, it can only be used to define DAGs of layers – recursive +networks should be defined as Layer subclasses instead.

+

Learn more about the Functional API here.

+

In your research workflows, you may often find yourself +mix-and-matching OO models and Functional models.

+

Note that the Model class also features built-in +training & evaluation loops: fit(), +predict() and evaluate() (configured via the +compile() method). These built-in functions give you access +to the following built-in training infrastructure features:

+
    +
  • +Callbacks. +You can leverage built-in callbacks for early-stopping, model +checkpointing, and monitoring training with TensorBoard. You can also implement custom callbacks if +needed.
  • +
  • +Distributed +training. You can easily scale up your training to multiple GPUs, +TPU, or even multiple machines with the tf.distribute API – +with no changes to your code.
  • +
  • +Step +fusing. With the steps_per_execution argument in +Model.compile(), you can process multiple batches in a +single tf.function call, which greatly improves device +utilization on TPUs.
  • +
+

We won’t go into the details, but we provide a simple code example +below. It leverages the built-in training infrastructure to implement +the MNIST example above.

+
+inputs <- layer_input(shape = 784, dtype="float32")
+outputs <- inputs %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 10)
+model <- keras_model(inputs, outputs)
+
+# Specify the loss, optimizer, and metrics with `compile()`.
+model %>% compile(
+    loss = loss_sparse_categorical_crossentropy(from_logits=TRUE),
+    optimizer=optimizer_adam(learning_rate=1e-3),
+    metrics=list(metric_sparse_categorical_accuracy()),
+)
+
+# Train the model with the dataset for 2 epochs.
+model %>% fit(dataset, epochs=2)
+
## Epoch 1/2
+## 938/938 - 4s - 4ms/step - loss: 0.3958 - sparse_categorical_accuracy: 0.8866
+## Epoch 2/2
+## 938/938 - 1s - 960us/step - loss: 0.1888 - sparse_categorical_accuracy: 0.9443
+
+predictions <- model %>% predict(dataset)
+
## 938/938 - 1s - 1ms/step
+
+model %>% evaluate(dataset)
+
## 938/938 - 1s - 1ms/step - loss: 0.1763 - sparse_categorical_accuracy: 0.9454
+
## $loss
+## [1] 0.1763445
+##
+## $sparse_categorical_accuracy
+## [1] 0.9454167
+

You can always subclass the Model class (it works +exactly like subclassing Layer) if you want to leverage +built-in training loops for your OO models. Just override the +Model$train_step() to customize what happens in +fit() while retaining support for the built-in +infrastructure features outlined above – callbacks, zero-code +distribution support, and step fusing support. You may also override +test_step() to customize what happens in +evaluate(), and override predict_step() to +customize what happens in predict(). For more information, +please refer to this +guide.

+
+CustomModel <- new_model_class(
+  "CustomModel",
+  initialize = function(...) {
+    super$initialize(...)
+    self$loss_tracker <- metric_mean(name="loss")
+    self$accuracy <- metric_sparse_categorical_accuracy()
+    self$loss_fn <- loss_sparse_categorical_crossentropy(from_logits=TRUE)
+    self$optimizer <- optimizer_adam(learning_rate=1e-3)
+  },
+  train_step = function(data) {
+    c(x, y, sample_weight) %<-% unpack_x_y_sample_weight(data)
+    with(tf$GradientTape() %as% tape, {
+      y_pred <- self(x, training=TRUE)
+      loss <- self$loss_fn(y = y, y_pred = y_pred, sample_weight=sample_weight)
+    })
+    gradients <- tape$gradient(loss, self$trainable_variables)
+    self$optimizer$apply_gradients(
+      zip_lists(gradients, self$trainable_variables)
+    )
+
+    # Update metrics (includes the metric that tracks the loss)
+    self$loss_tracker$update_state(loss)
+    self$accuracy$update_state(y, y_pred, sample_weight=sample_weight)
+    # Return a list mapping metric names to current value
+    list(
+      loss = self$loss_tracker$result(),
+      accuracy = self$accuracy$result()
+    )
+  },
+  metrics = mark_active(function() {
+    list(self$loss_tracker, self$accuracy)
+  })
+)
+
+inputs <- layer_input(shape = 784, dtype="float32")
+outputs <- inputs %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 32, activation = "relu") %>%
+  layer_dense(units = 10)
+model <- CustomModel(inputs, outputs)
+model %>% compile()
+model %>% fit(dataset, epochs=2)
+
## Epoch 1/2
+## 938/938 - 2s - 2ms/step - loss: 0.3869 - sparse_categorical_accuracy: 0.8924
+## Epoch 2/2
+## 938/938 - 1s - 1ms/step - loss: 0.2163 - sparse_categorical_accuracy: 0.9370
+
+
+

End-to-end experiment example 1: variational autoencoders. +

+

Here are some of the things you’ve learned so far:

+
    +
  • A Layer encapsulates a state (created in +__init__ or build) and some computation +(defined in call).
  • +
  • Layers can be recursively nested to create new, bigger computation +blocks.
  • +
  • You can easily write highly hackable training loops by opening a +GradientTape, calling your model inside the tape’s scope, +then retrieving gradients and applying them via an optimizer.
  • +
  • You can speed up your training loops using the +@tf.function decorator.
  • +
  • Layers can create and track losses (typically regularization losses) +via self.add_loss().
  • +
+

Let’s put all of these things together into an end-to-end example: +we’re going to implement a Variational AutoEncoder (VAE). We’ll train it +on MNIST digits.

+

Our VAE will be a subclass of Layer, built as a nested +composition of layers that subclass Layer. It will feature +a regularization loss (KL divergence).

+

Below is our model definition.

+

First, we have an Encoder class, which uses a +Sampling layer to map a MNIST digit to a latent-space +triplet (z_mean, z_log_var, z).

+
+Sampling <- new_layer_class(
+  "Sampling",
+  call = function(inputs) {
+    c(z_mean, z_log_var) %<-% inputs
+    batch <- op_shape(z_mean)[[1]]
+    dim <- op_shape(z_mean)[[2]]
+    epsilon <- random_normal(shape = c(batch, dim))
+    z_mean + op_exp(0.5 * z_log_var) * epsilon
+  }
+)
+
+Encoder <- new_layer_class(
+  "Encoder",
+  initialize = function(latent_dim = 32, intermediate_dim = 64, ...) {
+    super$initialize(...)
+    self$dense_proj <- layer_dense(units = intermediate_dim, activation = "relu")
+    self$dense_mean <- layer_dense(units = latent_dim)
+    self$dense_log_var <- layer_dense(units = latent_dim)
+    self$sampling <- Sampling()
+  },
+  call = function(inputs) {
+    x <- self$dense_proj(inputs)
+    z_mean <- self$dense_mean(x)
+    z_log_var <- self$dense_log_var(x)
+    z <- self$sampling(list(z_mean, z_log_var))
+    list(z_mean, z_log_var, z)
+  }
+)
+

Next, we have a Decoder class, which maps the +probabilistic latent space coordinates back to a MNIST digit.

+
+Decoder <- new_layer_class(
+  "Decoder",
+  initialize = function(original_dim, intermediate_dim = 64, ...) {
+    super$initialize(...)
+    self$dense_proj <- layer_dense(units = intermediate_dim, activation = "relu")
+    self$dense_output <- layer_dense(units = original_dim, activation = "sigmoid")
+  },
+  call = function(inputs) {
+    x <- self$dense_proj(inputs)
+    self$dense_output(x)
+  }
+)
+

Finally, our VariationalAutoEncoder composes together an +encoder and a decoder, and creates a KL divergence regularization loss +via add_loss().

+
+VariationalAutoEncoder <- new_model_class(
+  "VariationalAutoEncoder",
+  initialize = function(original_dim,
+        intermediate_dim=64,
+        latent_dim=32,
+        name="autoencoder", ...) {
+    super$initialize(name = name, ...)
+    self$original_dim <- original_dim
+    self$encoder <- Encoder(
+      latent_dim = latent_dim,
+      intermediate_dim = intermediate_dim
+    )
+    self$decoder <- Decoder(
+      original_dim = original_dim,
+      intermediate_dim = intermediate_dim
+    )
+  },
+  call = function(inputs) {
+    c(z_mean, z_log_var, z) %<-% self$encoder(inputs)
+    reconstructed <- self$decoder(z)
+    # Add KL divergence regularization loss.
+    kl_loss <- -0.5 * op_mean(
+      z_log_var - op_square(z_mean) - op_exp(z_log_var) + 1
+    )
+    self$add_loss(kl_loss)
+    reconstructed
+  }
+)
+

Now, let’s write a training loop. Our training step is decorated with +a @tf.function to compile into a super fast graph +function.

+
+# Our model.
+vae <- VariationalAutoEncoder(
+  original_dim = 784,
+  intermediate_dim = 64,
+  latent_dim = 32
+)
+
+# Loss and optimizer.
+loss_fn <- loss_mean_squared_error()
+optimizer = optimizer_adam(learning_rate=1e-3)
+
+# Prepare a dataset.
+c(c(x_train, .), .) %<-% dataset_mnist()
+x_train <- array_reshape(x_train, c(60000, 784)) / 255
+
+dataset <- tfdatasets::tensor_slices_dataset(x_train) %>%
+  tfdatasets::dataset_shuffle(buffer_size=1024) %>%
+  tfdatasets::dataset_batch(32)
+
+
+training_step <- tf_function(function(x) {
+  with(tf$GradientTape() %as% tape, {
+    reconstructed <- vae(x)  # Compute input reconstruction.
+    # Compute loss.
+    loss <- loss_fn(x, reconstructed)
+    loss <- loss + op_sum(vae$losses)  # Add KLD term.
+  })
+  # Update the weights of the VAE.
+  grads <- tape$gradient(loss, vae$trainable_weights)
+  optimizer$apply_gradients(zip_lists(grads, vae$trainable_weights))
+  loss
+})
+
+losses <- c()  # Keep track of the losses over time.
+coro::loop(for(data in dataset) {
+  loss <- training_step(data)
+
+  # Logging.
+  losses[length(losses) + 1] <- as.numeric(loss)
+  if (length(losses) %% 100 == 0) {
+    cat("Step:", length(losses), "Loss:", mean(losses), "\n")
+  }
+  # Stop after 1000 steps.
+  # Training the model to convergence is left
+  # as an exercise to the reader.
+  if (length(losses) >= 1000) {
+    break
+  }
+})
+
## Step: 100 Loss: 0.1270978
+## Step: 200 Loss: 0.1003238
+## Step: 300 Loss: 0.09001128
+## Step: 400 Loss: 0.08493649
+## Step: 500 Loss: 0.08171404
+## Step: 600 Loss: 0.07926706
+## Step: 700 Loss: 0.07790599
+## Step: 800 Loss: 0.07670419
+## Step: 900 Loss: 0.07570736
+## Step: 1000 Loss: 0.07476593
+

As you can see, building and training this type of model in Keras is +quick and painless.

+
+
+

End-to-end experiment example 2: hypernetworks. +

+

Let’s take a look at another kind of research experiment: +hypernetworks.

+

The idea is to use a small deep neural network (the hypernetwork) to +generate the weights for a larger network (the main network).

+

Let’s implement a really trivial hypernetwork: we’ll use a small +2-layer network to generate the weights of a larger 3-layer network.

+
+input_dim <- 784
+classes <- 10
+
+# This is the main network we'll actually use to predict labels.
+inputs <- layer_input(shape = input_dim)
+dense1 <- layer_dense(units = 64, activation = "relu")
+dense1$built <- TRUE
+
+dense2 <- layer_dense(units = classes)
+dense2$built <- TRUE
+
+outputs <- inputs %>% dense1() %>% dense2()
+main_network <- keras_model(inputs, outputs)
+
+# This is the number of weight coefficients to generate. Each layer in the
+# main network requires output_dim * input_dim + output_dim coefficients.
+num_weights_to_generate <- (classes * 64 + classes) + (64 * input_dim + 64)
+
+# This is the hypernetwork that generates the weights of the `main_network` above.
+hypernetwork <- keras_model_sequential() %>%
+  layer_dense(units=16, activation="relu") %>%
+  layer_dense(units=num_weights_to_generate, activation="sigmoid")
+

This is our training loop. For each batch of data:

+
    +
  • We use hypernetwork to generate an array of weight +coefficients, weights_pred +
  • +
  • We reshape these coefficients into kernel & bias tensors for the +main_network +
  • +
  • We run the forward pass of the main_network to compute +the actual MNIST predictions
  • +
  • We run backprop through the weights of the hypernetwork +to minimize the final classification loss
  • +
+
+# Loss and optimizer.
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits = TRUE)
+optimizer <- optimizer_adam(learning_rate=1e-4)
+
+# Prepare a dataset.
+c(c(x_train, y_train), .) %<-% dataset_mnist()
+x_train <- array_reshape(x_train, c(60000, 784)) / 255
+
+dataset <- tfdatasets::tensor_slices_dataset(list(x_train, y_train)) %>%
+  tfdatasets::dataset_shuffle(buffer_size=1024) %>%
+  # We'll use a batch size of 1 for this experiment.
+  tfdatasets::dataset_batch(1)
+
+train_step <- function(x, y) {
+  with(tf$GradientTape() %as% tape, {
+    weights_pred <- hypernetwork(x)
+
+    # Reshape them to the expected shapes for w and b for the outer model.
+    # Layer 1 kernel.
+    start_index <- 1
+    w1_shape <- c(input_dim, 64)
+    w1_coeffs <- weights_pred[, start_index:(start_index + prod(w1_shape) - 1)]
+    w1 <- tf$reshape(w1_coeffs, as.integer(w1_shape))
+    start_index <- start_index + prod(w1_shape)
+
+    # Layer 1 bias.
+    b1_shape <- c(64)
+    b1_coeffs <- weights_pred[, start_index:(start_index + prod(b1_shape) - 1)]
+    b1 <- tf$reshape(b1_coeffs, as.integer(b1_shape))
+    start_index <- start_index + prod(b1_shape)
+
+    # Layer 2 kernel.
+    w2_shape <- c(64, classes)
+    w2_coeffs <- weights_pred[, start_index:(start_index + prod(w2_shape) - 1)]
+    w2 <- tf$reshape(w2_coeffs, as.integer(w2_shape))
+    start_index <- start_index + prod(w2_shape)
+
+    # Layer 2 bias.
+    b2_shape <- c(classes)
+    b2_coeffs <- weights_pred[, start_index:(start_index + prod(b2_shape) - 1)]
+    b2 <- tf$reshape(b2_coeffs, as.integer(b2_shape))
+    start_index <- start_index + prod(b2_shape)
+
+    # Set the weight predictions as the weight variables on the outer model.
+    dense1$kernel <- w1
+    dense1$bias <- b1
+    dense2$kernel <- w2
+    dense2$bias <- b2
+
+    # Inference on the outer model.
+    preds <- main_network(x)
+    loss <- loss_fn(y, preds)
+  })
+
+  grads <- tape$gradient(loss, hypernetwork$trainable_weights)
+  optimizer$apply_gradients(zip_lists(grads, hypernetwork$trainable_weights))
+  loss
+}
+
+losses <- c()  # Keep track of the losses over time.
+coro::loop(for (data in dataset) {
+  x <- data[[1]]
+  y <- data[[2]]
+  loss <- train_step(x, y)
+
+  # Logging.
+  losses[length(losses) + 1] <- as.numeric(loss)
+  if (length(losses) %% 100 == 0) {
+    cat("Step:", length(losses), "Loss:", mean(losses), "\n")
+  }
+  # Stop after 1000 steps.
+  # Training the model to convergence is left
+  # as an exercise to the reader.
+  if (length(losses) >= 1000) {
+    break
+  }
+})
+
## Step: 100 Loss: 2.536778
+## Step: 200 Loss: 2.236472
+## Step: 300 Loss: 2.119417
+## Step: 400 Loss: 2.040341
+## Step: 500 Loss: 1.949125
+## Step: 600 Loss: 1.859384
+## Step: 700 Loss: 1.845726
+## Step: 800 Loss: 1.820594
+## Step: 900 Loss: 1.771334
+## Step: 1000 Loss: 1.730648
+

Implementing arbitrary research ideas with Keras is straightforward +and highly productive. Imagine trying out 25 ideas per day (20 minutes +per experiment on average)!

+

Keras has been designed to go from idea to results as fast as +possible, because we believe this is the key to doing great +research.

+

We hope you enjoyed this quick introduction. Let us know what you +build with Keras!

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/making_new_layers_and_models_via_subclassing.html b/docs/articles/making_new_layers_and_models_via_subclassing.html new file mode 100644 index 0000000000..17d1aaf3f0 --- /dev/null +++ b/docs/articles/making_new_layers_and_models_via_subclassing.html @@ -0,0 +1,845 @@ + + + + + + + + +Making new layers and models via subclassing • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

This guide will cover everything you need to know to build your own +subclassed layers and models. In particular, you’ll learn about the +following features:

+
    +
  • The Layer class
  • +
  • The add_weight() method
  • +
  • Trainable and non-trainable weights
  • +
  • The build() method
  • +
  • Making sure your layers can be used with any backend
  • +
  • The add_loss() method
  • +
  • The training argument in call() +
  • +
  • The mask argument in call() +
  • +
  • Making sure your layers can be serialized
  • +
+

Let’s dive in.

+
+
+

Setup +

+
+library(keras3)
+library(tensorflow, exclude = c("set_random_seed", "shape"))
+library(tfdatasets, exclude = "shape")
+
+
+

The Layer class: the combination of state (weights) and +some computation +

+

One of the central abstractions in Keras is the Layer +class. A layer encapsulates both a state (the layer’s “weights”) and a +transformation from inputs to outputs (a “call”, the layer’s forward +pass).

+

Here’s a densely-connected layer. It has two state variables: the +variables w and b.

+
+layer_linear <- Layer("Linear",
+
+  initialize = function(units = 32, input_dim = 32, ...) {
+    super$initialize(...)
+    self$w <- self$add_weight(
+      shape = shape(input_dim, units),
+      initializer = "random_normal",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(units),
+      initializer = "zeros",
+      trainable = TRUE
+    )
+  },
+
+  call = function(inputs) {
+    op_matmul(inputs, self$w) + self$b
+  }
+)
+

You would use a layer by calling it on some tensor input(s), much +like an R function.

+
+x <- op_ones(c(2, 2))
+linear_layer <- layer_linear(units = 4, input_dim = 2)
+y <- linear_layer(x)
+print(y)
+
## tf.Tensor(
+## [[0.02153057 0.15450525 0.0205495  0.04493225]
+##  [0.02153057 0.15450525 0.0205495  0.04493225]], shape=(2, 4), dtype=float32)
+

Note that the weights w and b are +automatically tracked by the layer upon being set as layer +attributes:

+
+linear_layer$weights
+
## [[1]]
+## <KerasVariable shape=(2, 4), dtype=float32, path=linear/variable>
+##
+## [[2]]
+## <KerasVariable shape=(4), dtype=float32, path=linear/variable_1>
+
+
+

Layers can have non-trainable weights +

+

Besides trainable weights, you can add non-trainable weights to a +layer as well. Such weights are meant not to be taken into account +during backpropagation, when you are training the layer.

+

Here’s how to add and use a non-trainable weight:

+
+layer_compute_sum <- Layer(
+  "ComputeSum",
+  initialize = function(input_dim) {
+    super$initialize()
+    self$total <- self$add_weight(
+      initializer = "zeros",
+      shape = shape(input_dim),
+      trainable = FALSE
+    )
+  },
+  call = function(inputs) {
+    self$total$assign_add(op_sum(inputs, axis = 1))
+    self$total
+  }
+)
+
+x <- op_ones(c(2, 2))
+my_sum <- layer_compute_sum(input_dim = 2)
+y <- my_sum(x)
+print(as.array(y))
+
## [1] 2 2
+
+y <- my_sum(x)
+print(as.array(y))
+
## [1] 4 4
+

It’s part of layer$weights, but it gets categorized as a +non-trainable weight:

+
+cat("weights:", length(my_sum$weights))
+
## weights: 1
+
+cat("non-trainable weights:", length(my_sum$non_trainable_weights))
+
## non-trainable weights: 1
+
+# It's not included in the trainable weights:
+cat("trainable_weights:", length(my_sum$trainable_weights))
+
## trainable_weights: 0
+
+
+

Best practice: deferring weight creation until the shape of the +inputs is known +

+

Our Linear layer above took an input_dim +argument that was used to compute the shape of the weights +w and b in initialize():

+
+layer_linear <- Layer("Linear",
+
+  initialize = function(units = 32, input_dim = 32, ...) {
+    super$initialize(...)
+    self$w <- self$add_weight(
+      shape = shape(input_dim, units),
+      initializer = "random_normal",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(units),
+      initializer = "zeros",
+      trainable = TRUE
+    )
+  },
+
+  call = function(inputs) {
+    op_matmul(inputs, self$w) + self$b
+  }
+)
+

In many cases, you may not know in advance the size of your inputs, +and you would like to lazily create weights when that value becomes +known, some time after instantiating the layer.

+

In the Keras API, we recommend creating layer weights in the +build(self, inputs_shape) method of your layer. Like +this:

+
+layer_linear <- Layer(
+  "Linear",
+  initialize = function(units = 32, ...) {
+    self$units <- as.integer(units)
+    super$initialize(...)
+  },
+  build = function(input_shape) {
+    self$w <- self$add_weight(
+      shape = shape(tail(input_shape, 1), self$units),
+      initializer = "random_normal",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(self$units),
+      initializer = "zeros",
+      trainable = TRUE
+    )
+  },
+  call = function(inputs) {
+    op_matmul(inputs, self$w) + self$b
+  }
+)
+

The call() method of your layer will automatically run +build the first time it is called. You now have a layer that’s lazy and +thus easier to use:

+
+# At instantiation, we don't know on what inputs this is going to get called
+linear_layer <- layer_linear(units = 32)
+
+# The layer's weights are created dynamically the first time the layer is called
+y <- linear_layer(x)
+

Implementing build() separately as shown above nicely +separates creating weights only once from using weights in every +call.

+
+
+

Layers are recursively composable +

+

If you assign a Layer instance as an attribute of another Layer, the +outer layer will start tracking the weights created by the inner +layer.

+

We recommend creating such sublayers in the initialize() +method and leave it to the first call() to trigger building +their weights.

+
+MLPBlock <- Layer(
+  "MLPBlock",
+  initialize = function() {
+    super$initialize()
+    self$linear_1 <- layer_linear(units = 32)
+    self$linear_2 <- layer_linear(units = 32)
+    self$linear_3 <- layer_linear(units = 1)
+  },
+  call = function(inputs) {
+    inputs |>
+      self$linear_1() |>
+      activation_relu() |>
+      self$linear_2() |>
+      activation_relu() |>
+      self$linear_3()
+  }
+)
+
+mlp <- MLPBlock()
+# The first call to the `mlp` will create the weights
+y <- mlp(op_ones(shape = c(3, 64)))
+
+cat("weights:", length(mlp$weights), "\n")
+
## weights: 6
+
+cat("trainable weights:", length(mlp$trainable_weights), "\n")
+
## trainable weights: 6
+
+
+

Backend-agnostic layers and backend-specific layers +

+

As long as a layer only uses APIs from the ops namespace +(ie. using functions starting with op_), (or other Keras +namespaces such as activations_*, random_*, or +layer_*), then it can be used with any backend – +TensorFlow, JAX, or PyTorch.

+

All layers you’ve seen so far in this guide work with all Keras +backends.

+

The ops namespace gives you access to:

+
    +
  • The NumPy API, e.g. op_matmul, op_sum, +op_reshape, op_stack, etc.
  • +
  • Neural networks-specific APIs such as op_softmax, +op_conv, op_binary_crossentropy, +op_relu, etc.
  • +
+

You can also use backend-native APIs in your layers (such as +tf$nn functions), but if you do this, then your layer will +only be usable with the backend in question. For instance, you could +write the following JAX-specific layer using jax$numpy:

+
# keras3::install_keras(backend = c("jax"))
+jax <- reticulate::import("jax")
+
+Linear <- new_layer_class(
+  ...
+  call = function(inputs) {
+    jax$numpy$matmul(inputs, self$w) + self$b
+  }
+)
+

This would be the equivalent TensorFlow-specific layer:

+
library(tensorflow)
+
+Linear <- new_layer_class(
+  ...
+  call = function(inputs) {
+    tf$matmul(inputs, self$w) + self$b
+  }
+)
+

And this would be the equivalent PyTorch-specific layer:

+
torch <- reticulate::import("torch")
+
+Linear <- new_layer_class(
+  ...
+  call = function(inputs) {
+    torch$matmul(inputs, self$w) + self$b
+  }
+)
+

Because cross-backend compatibility is a tremendously useful +property, we strongly recommend that you seek to always make your layers +backend-agnostic by leveraging only Keras APIs.

+
+
+

The add_loss() method +

+

When writing the call() method of a layer, you can +create loss tensors that you will want to use later, when writing your +training loop. This is doable by calling +self$add_loss(value):

+
+# A layer that creates an activity regularization loss
+layer_activity_regularization <- Layer(
+  "ActivityRegularizationLayer",
+  initialize = function(rate = 1e-2) {
+    self$rate <- as.numeric(rate)
+    super$initialize()
+  },
+  call = function(inputs) {
+    self$add_loss(self$rate * op_mean(inputs))
+    inputs
+  }
+)
+

These losses (including those created by any inner layer) can be +retrieved via layer$losses. This property is reset at the +start of every call to the top-level layer, so that +layer$losses always contains the loss values created during +the last forward pass.

+
+layer_outer <- Layer(
+  "OuterLayer",
+  initialize = function() {
+    super$initialize()
+    self$activity_reg <- layer_activity_regularization(rate = 1e-2)
+  },
+  call = function(inputs) {
+    self$activity_reg(inputs)
+    inputs
+  }
+)
+
+layer <- layer_outer()
+# No losses yet since the layer has never been called
+cat("losses:", length(layer$losses), "\n")
+
## losses: 0
+
+x <- layer(op_zeros(c(1, 1)))
+# We created one loss value
+cat("losses:", length(layer$losses), "\n")
+
## losses: 1
+
+# `layer$losses` gets reset at the start of each call
+x <- layer(op_zeros(c(1, 1)))
+# This is the loss created during the call above
+cat("losses:", length(layer$losses), "\n")
+
## losses: 1
+

In addition, the loss property also contains +regularization losses created for the weights of any inner layer:

+
+layer_outer_with_kernel_regularizer <- Layer(
+  "OuterLayerWithKernelRegularizer",
+  initialize = function() {
+    super$initialize()
+    self$dense <- layer_dense(units = 32,
+                              kernel_regularizer = regularizer_l2(1e-3))
+  },
+  call = function(inputs) {
+    self$dense(inputs)
+  }
+)
+
+layer <- layer_outer_with_kernel_regularizer()
+x <- layer(op_zeros(c(1, 1)))
+
+# This is `1e-3 * sum(layer$dense$kernel ** 2)`,
+# created by the `kernel_regularizer` above.
+print(layer$losses)
+
## [[1]]
+## tf.Tensor(0.002025157, shape=(), dtype=float32)
+

These losses are meant to be taken into account when writing custom +training loops.

+

They also work seamlessly with fit() (they get +automatically summed and added to the main loss, if any):

+
+inputs <- keras_input(shape = 3)
+outputs <- inputs |> layer_activity_regularization()
+model <- keras_model(inputs, outputs)
+
+# If there is a loss passed in `compile`, the regularization
+# losses get added to it
+model |> compile(optimizer = "adam", loss = "mse")
+model |> fit(random_normal(c(2, 3)), random_normal(c(2, 3)), epochs = 1)
+
## 1/1 - 0s - 136ms/step - loss: 1.8971
+
+# It's also possible not to pass any loss in `compile`,
+# since the model already has a loss to minimize, via the `add_loss`
+# call during the forward pass!
+model |> compile(optimizer = "adam")
+model |> fit(random_normal(c(2, 3)), random_normal(c(2, 3)), epochs = 1)
+
## 1/1 - 0s - 84ms/step - loss: -3.3344e-03
+
+
+

You can optionally enable serialization on your layers +

+

If you need your custom layers to be serializable as part of a Functional model, you can optionally +implement a get_config() method:

+
+layer_linear <- Layer(
+  "Linear",
+  initialize = function(units = 32) {
+    self$units <- as.integer(units)
+    super$initialize()
+  },
+  build = function(input_shape) {
+    self$w <- self$add_weight(
+      shape = shape(tail(input_shape, 1), self$units),
+      initializer = "random_normal",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(self$units),
+      initializer = "zeros",
+      trainable = TRUE
+    )
+  },
+  call = function(inputs) {
+    op_matmul(inputs, self$w) + self$b
+  },
+  get_config = function() {
+    list(units = self$units)
+  }
+)
+
+# Now you can recreate the layer from its config:
+layer <- layer_linear(units = 64)
+config <- get_config(layer)
+str(config)
+
## List of 1
+##  $ units: int 64
+##  - attr(*, "__class__")=<class '<r-globalenv>.Linear'>
+
+new_layer <- from_config(config)
+

Note that the initialize() method of the base +Layer class takes some keyword arguments, in particular a +name and a dtype. It’s good practice to pass +these arguments to the parent class in initialize() and to +include them in the layer config:

+
+Linear <- new_layer_class(
+  "Linear",
+  initialize = function(units = 32, ...) {
+    self$units <- as.integer(units)
+    super$initialize(...)
+  },
+  build = function(input_shape) {
+    self$w <- self$add_weight(
+      shape = shape(tail(input_shape, 1), self$units),
+      initializer = "random_normal",
+      trainable = TRUE
+    )
+    self$b <- self$add_weight(
+      shape = shape(self$units),
+      initializer = "zeros",
+      trainable = TRUE
+    )
+  },
+  call = function(inputs) {
+    op_matmul(inputs, self$w) + self$b
+  },
+  get_config = function() {
+    list(units = self$units)
+  }
+)
+
+layer <- Linear(units = 64)
+config <- get_config(layer)
+str(config)
+
## List of 1
+##  $ units: int 64
+##  - attr(*, "__class__")=<class '<r-globalenv>.Linear'>
+
+new_layer <- from_config(config)
+

If you need more flexibility when deserializing the layer from its +config, you can also override the from_config() class +method. This is the base implementation of +from_config():

+
+Layer(
+  ...,
+  from_config = function(config) {
+    # calling `__class__`() creates a new instance and calls initialize()
+    do.call(`__class__`, config)
+  }
+)
+

To learn more about serialization and saving, see the complete guide to saving and serializing +models.

+
+
+

Privileged training argument in the call() +method +

+

Some layers, in particular the BatchNormalization layer +and the Dropout layer, have different behaviors during +training and inference. For such layers, it is standard practice to +expose a training (boolean) argument in the +call() method.

+

By exposing this argument in call(), you enable the +built-in training and evaluation loops (e.g. fit()) to +correctly use the layer in training and inference.

+
+layer_custom_dropout <- Layer(
+  "CustomDropout",
+  initialize = function(rate, ...) {
+    super$initialize(...)
+    self$rate <- rate
+    self$seed_generator <- random_seed_generator(1337)
+  },
+  call = function(inputs, training = NULL) {
+    if (isTRUE(training))
+      return(random_dropout(inputs, rate = self$rate,
+                            seed = self.seed_generator))
+    inputs
+  }
+)
+
+
+

Privileged mask argument in the call() +method +

+

The other privileged argument supported by call() is the +mask argument.

+

You will find it in all Keras RNN layers. A mask is a boolean tensor +(one boolean value per timestep in the input) used to skip certain input +timesteps when processing timeseries data.

+

Keras will automatically pass the correct mask argument +to call() for layers that support it, when a mask is +generated by a prior layer. Mask-generating layers are the +Embedding layer configured with +mask_zero = TRUE, and the Masking layer.

+
+
+

The Model class +

+

In general, you will use the Layer class to define inner +computation blocks, and will use the Model class to define +the outer model – the object you will train.

+

For instance, in a ResNet50 model, you would have several ResNet +blocks subclassing Layer, and a single Model +encompassing the entire ResNet50 network.

+

The Model class has the same API as Layer, +with the following differences:

+
    +
  • It exposes built-in training, evaluation, and prediction loops +(fit(), evaluate(), +predict()).
  • +
  • It exposes the list of its inner layers, via the +model$layers property.
  • +
  • It exposes saving and serialization APIs (save(), +save_weights()…)
  • +
+

Effectively, the Layer class corresponds to what we +refer to in the literature as a “layer” (as in “convolution layer” or +“recurrent layer”) or as a “block” (as in “ResNet block” or “Inception +block”).

+

Meanwhile, the Model class corresponds to what is +referred to in the literature as a “model” (as in “deep learning model”) +or as a “network” (as in “deep neural network”).

+

So if you’re wondering, “should I use the Layer class or +the Model class?”, ask yourself: will I need to call +fit() on it? Will I need to call save() on it? +If so, go with Model. If not (either because your class is +just a block in a bigger system, or because you are writing training +& saving code yourself), use Layer.

+

For instance, we could take our mini-resnet example above, and use it +to build a Model that we could train with +fit(), and that we could save with +save_weights():

+
+ResNet <- Model(
+  "ResNet",
+  initialize = function(num_classes = 1000, ...) {
+    super$initialize(...)
+    self$block_1 <- layer_resnet_block()
+    self$block_2 <- layer_resnet_block()
+    self$global_pool <- layer_global_average_pooling_2d()
+    self$classifier <- layer_dense(num_classes)
+  },
+  call = function(inputs) {
+    inputs |>
+      self$block_1() |>
+      self$block_2() |>
+      self$global_pool() |>
+      self$classifier()
+  }
+)
+
+resnet <- ResNet()
+dataset <- ...
+resnet |> fit(dataset, epochs=10)
+resnet |> save_model("filepath.keras")
+
+
+

Putting it all together: an end-to-end example +

+

Here’s what you’ve learned so far:

+
    +
  • A Layer encapsulate a state (created in +initialize() or build()) and some computation +(defined in call()).
  • +
  • Layers can be recursively nested to create new, bigger computation +blocks.
  • +
  • Layers are backend-agnostic as long as they only use Keras APIs. You +can use backend-native APIs (such as jax$numpy, +torch$nn or tf$nn), but then your layer will +only be usable with that specific backend.
  • +
  • Layers can create and track losses (typically regularization losses) +via add_loss().
  • +
  • The outer container, the thing you want to train, is a +Model. A Model is just like a +Layer, but with added training and serialization +utilities.
  • +
+

Let’s put all of these things together into an end-to-end example: +we’re going to implement a Variational AutoEncoder (VAE) in a +backend-agnostic fashion – so that it runs the same with TensorFlow, +JAX, and PyTorch. We’ll train it on MNIST digits.

+

Our VAE will be a subclass of Model, built as a nested +composition of layers that subclass Layer. It will feature +a regularization loss (KL divergence).

+
+layer_sampling <- Layer(
+  "Sampling",
+  initialize = function(...) {
+    super$initialize(...)
+    self$seed_generator <- random_seed_generator(1337)
+  },
+  call = function(inputs) {
+    c(z_mean, z_log_var) %<-% inputs
+    batch <- op_shape(z_mean)[[1]]
+    dim <- op_shape(z_mean)[[2]]
+    epsilon <- random_normal(shape = c(batch, dim),
+                             seed=self$seed_generator)
+    z_mean + op_exp(0.5 * z_log_var) * epsilon
+  }
+)
+
+# Maps MNIST digits to a triplet (z_mean, z_log_var, z).
+layer_encoder <- Layer(
+  "Encoder",
+  initialize = function(latent_dim = 32, intermediate_dim = 64, ...) {
+    super$initialize(...)
+    self$dense_proj <-
+      layer_dense(units = intermediate_dim,  activation = "relu")
+    self$dense_mean <- layer_dense(units = latent_dim)
+    self$dense_log_var <- layer_dense(units = latent_dim)
+    self$sampling <- layer_sampling()
+  },
+  call = function(inputs) {
+    x <- self$dense_proj(inputs)
+    z_mean <- self$dense_mean(x)
+    z_log_var <- self$dense_log_var(x)
+    z <- self$sampling(list(z_mean, z_log_var))
+    list(z_mean, z_log_var, z)
+  }
+)
+
+# Converts z, the encoded digit vector, back into a readable digit.
+layer_decoder <- Layer(
+  "Decoder",
+  initialize = function(original_dim, intermediate_dim = 64, ...) {
+    super$initialize(...)
+    self$dense_proj <-
+      layer_dense(units = intermediate_dim, activation = "relu")
+    self$dense_output <-
+      layer_dense(units = original_dim, activation = "sigmoid")
+  },
+  call = function(inputs) {
+    x <- self$dense_proj(inputs)
+    self$dense_output(x)
+  }
+)
+
+# Combines the encoder and decoder into an end-to-end model for training.
+VariationalAutoEncoder <- Model(
+  "VariationalAutoEncoder",
+
+  initialize = function(original_dim, intermediate_dim = 64, latent_dim = 32,
+                        name = "autoencoder", ...) {
+    super$initialize(name = name, ...)
+    self$original_dim <- original_dim
+    self$encoder <- layer_encoder(latent_dim = latent_dim,
+                            intermediate_dim = intermediate_dim)
+    self$decoder <- layer_decoder(original_dim = original_dim,
+                            intermediate_dim = intermediate_dim)
+  },
+
+  call = function(inputs) {
+    c(z_mean, z_log_var, z) %<-% self$encoder(inputs)
+    reconstructed <- self$decoder(z)
+    # Add KL divergence regularization loss.
+    kl_loss <- -0.5 * op_mean(z_log_var - op_square(z_mean) - op_exp(z_log_var) + 1)
+    self$add_loss(kl_loss)
+    reconstructed
+  }
+)
+

Let’s train it on MNIST using the fit() API:

+
+c(c(x_train, .), .) %<-% dataset_mnist()
+x_train <- x_train |>
+  op_reshape(c(60000, 784)) |>
+  op_cast("float32") |>
+  op_divide(255)
+
+original_dim <- 784
+vae <- VariationalAutoEncoder(
+  original_dim = 784,
+  intermediate_dim = 64,
+  latent_dim = 32
+)
+
+optimizer <- optimizer_adam(learning_rate = 1e-3)
+vae |> compile(optimizer, loss = loss_mean_squared_error())
+
+vae |> fit(x_train, x_train, epochs = 2, batch_size = 64)
+
## Epoch 1/2
+## 938/938 - 5s - 5ms/step - loss: 0.0748
+## Epoch 2/2
+## 938/938 - 1s - 959us/step - loss: 0.0676
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/sequential_model.html b/docs/articles/sequential_model.html index a39f2dff79..f1c27de206 100644 --- a/docs/articles/sequential_model.html +++ b/docs/articles/sequential_model.html @@ -1,11 +1,534 @@ - - + + + - - + + + +The Sequential model • keras3 + + + + + + + + + + + + + + - - + Skip to contents + + +
+ + + +
+
+ + + +
+

Setup +

+ +
+
+

When to use a Sequential model +

+

A Sequential model is appropriate for a plain +stack of layers where each layer has exactly one input +tensor and one output tensor.

+

Schematically, the following Sequential model:

+
+model <- keras_model_sequential() |>
+  layer_dense(units = 2, activation = "relu", name = "layer1") |>
+  layer_dense(units = 3, activation = "relu", name = "layer2") |>
+  layer_dense(units = 4, name = "layer3")
+
+# Call model on a test input
+x <- op_ones(c(3, 3))
+y <- model(x)
+

is equivalent to this function:

+
+# Create 3 layers
+layer1 <- layer_dense(units = 2, activation="relu", name="layer1")
+layer2 <- layer_dense(units = 3, activation="relu", name="layer2")
+layer3 <- layer_dense(units = 4, name="layer3")
+
+# Call layers on a test input
+x <- op_ones(c(3, 3))
+y <- x |> layer1() |> layer2() |> layer3()
+

A Sequential model is not appropriate when:

+
    +
  • Your model has multiple inputs or multiple outputs
  • +
  • Any of your layers has multiple inputs or multiple outputs
  • +
  • You need to do layer sharing
  • +
  • You want non-linear topology (e.g. a residual connection, a +multi-branch model)
  • +
+
+
+

Creating a Sequential model +

+

You can create a Sequential model by piping layers into the +keras_model_sequential() object:

+
+model <- keras_model_sequential() |>
+  layer_dense(units = 2, activation = "relu") |>
+  layer_dense(units = 3, activation = "relu") |>
+  layer_dense(units = 4)
+

or by passing a list of layers to +keras_model_sequential():

+
+model <- keras_model_sequential(layers = list(
+  layer_dense(units = 2, activation = "relu"),
+  layer_dense(units = 3, activation = "relu"),
+  layer_dense(units = 4)
+))
+

Its layers are accessible via the layers attribute:

+
+model$layers
+
## [[1]]
+## <Dense name=dense_3, built=False>
+##  signature: (*args, **kwargs)
+##
+## [[2]]
+## <Dense name=dense_4, built=False>
+##  signature: (*args, **kwargs)
+##
+## [[3]]
+## <Dense name=dense_5, built=False>
+##  signature: (*args, **kwargs)
+

You can also create a Sequential model incrementally:

+
+model <- keras_model_sequential()
+model |> layer_dense(units = 2, activation="relu")
+model |> layer_dense(units = 3, activation="relu")
+model |> layer_dense(units = 4)
+

Note that there’s also a corresponding pop_layer() +method to remove layers: a Sequential model behaves very much like a +stack of layers.

+
+model |> pop_layer()
+length(model$layers)  # 2
+
## [1] 2
+

Also note that the Sequential constructor accepts a name +argument, just like any layer or model in Keras. This is useful to +annotate TensorBoard graphs with semantically meaningful names.

+
+model <- keras_model_sequential(name = "my_sequential")
+model |> layer_dense(units = 2, activation="relu", name = "layer1")
+model |> layer_dense(units = 3, activation="relu", name = "layer2")
+model |> layer_dense(units = 4, name = "layer3")
+
+
+

Specifying the input shape in advance +

+

Generally, all layers in Keras need to know the shape of their inputs +in order to be able to create their weights. So when you create a layer +like this, initially, it has no weights:

+
+layer <- layer_dense(units = 3)
+layer$weights  # Empty
+
## list()
+

It creates its weights the first time it is called on an input, since +the shape of the weights depends on the shape of the inputs:

+
+# Call layer on a test input
+x <- op_ones(c(1, 4))
+y <- layer(x)
+layer$weights  # Now it has weights, of shape (4, 3) and (3,)
+
## [[1]]
+## <KerasVariable shape=(4, 3), dtype=float32, path=dense_9/kernel>
+##
+## [[2]]
+## <KerasVariable shape=(3), dtype=float32, path=dense_9/bias>
+

Naturally, this also applies to Sequential models. When you +instantiate a Sequential model without an input shape, it isn’t “built”: +it has no weights (and calling model$weights results in an +error stating just this). The weights are created when the model first +sees some input data:

+
+model <- keras_model_sequential() |>
+  layer_dense(units = 2, activation = "relu") |>
+  layer_dense(units = 3, activation = "relu") |>
+  layer_dense(units = 4)
+# No weights at this stage!
+
+# At this point, you can't do this:
+# model$weights
+
+
+# Call the model on a test input
+x <- op_ones(c(1, 4))
+y <- model(x)
+length(model$weights)
+
## [1] 6
+

Once a model is “built”, you can call its summary() +method to display its contents:

+
+summary(model)
+
## Model: "sequential_4"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ dense_10 (Dense)                │ (1, 2)                 │            10
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_11 (Dense)                │ (1, 3)                 │             9
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_12 (Dense)                │ (1, 4)                 │            16
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 35 (140.00 B)
+##  Trainable params: 35 (140.00 B)
+##  Non-trainable params: 0 (0.00 B)
+

However, it can be very useful when building a Sequential model +incrementally to be able to display the summary of the model so far, +including the current output shape. In this case, you should start your +model by passing an input_shape argument to your model, so +that it knows its input shape from the start:

+
+model <- keras_model_sequential(input_shape = 4) |>
+  layer_dense(units = 2, activation = "relu")
+summary(model)
+
## Model: "sequential_5"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ dense_13 (Dense)                │ (None, 2)              │            10
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 10 (40.00 B)
+##  Trainable params: 10 (40.00 B)
+##  Non-trainable params: 0 (0.00 B)
+
+model$layers
+
## [[1]]
+## <Dense name=dense_13, built=True>
+##  signature: (*args, **kwargs)
+

Models built with a predefined input shape like this always have +weights (even before seeing any data) and always have a defined output +shape.

+

In general, it’s a recommended best practice to always specify the +input shape of a Sequential model in advance if you know what it is.

+
+
+

A common debugging workflow: add layers + +summary() +

+

When building a new Sequential architecture, it’s useful to +incrementally stack layers with |> and frequently print +model summaries. For instance, this enables you to monitor how a stack +of Conv2D and MaxPooling2D layers is +downsampling image feature maps:

+
+model <- keras_model_sequential(input_shape = c(250, 250, 3)) |>
+  layer_conv_2d(filters = 32, kernel_size = 5, strides = 2, activation = "relu") |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
+  layer_max_pooling_2d(pool_size = c(3, 3))
+
+# Can you guess what the current output shape is at this point? Probably not.
+# Let's just print it:
+summary(model)
+
## Model: "sequential_6"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv2d (Conv2D)                 │ (None, 123, 123, 32)   │         2,432
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 121, 121, 32)   │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 40, 40, 32)     │             0
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 11,680 (45.62 KB)
+##  Trainable params: 11,680 (45.62 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+# The answer was: (40, 40, 32), so we can keep downsampling...
+
+model |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
+  layer_max_pooling_2d(pool_size = 3) |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
+  layer_max_pooling_2d(pool_size = 2)
+
+# And now?
+summary(model)
+
## Model: "sequential_6"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv2d (Conv2D)                 │ (None, 123, 123, 32)   │         2,432
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 121, 121, 32)   │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d (MaxPooling2D)    │ (None, 40, 40, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 38, 38, 32)     │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_3 (Conv2D)               │ (None, 36, 36, 32)     │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d_1 (MaxPooling2D)  │ (None, 12, 12, 32)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_4 (Conv2D)               │ (None, 10, 10, 32)     │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_5 (Conv2D)               │ (None, 8, 8, 32)       │         9,248
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ max_pooling2d_2 (MaxPooling2D)  │ (None, 4, 4, 32)       │             0
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 48,672 (190.12 KB)
+##  Trainable params: 48,672 (190.12 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+# Now that we have 4x4 feature maps, time to apply global max pooling.
+model |>
+  layer_global_max_pooling_2d()
+
+# Finally, we add a classification layer.
+model |>
+  layer_dense(units = 10, activation = "softmax")
+

Very practical, right?

+

Note that |> is equivalent to calling +model$add(), it modifies the model in-place, so you don’t +need to reassign the model symbol at each step.

+
+
+

What to do once you have a model +

+

Once your model architecture is ready, you will want to:

+ +
+
+

Feature extraction with a Sequential model +

+

Once a Sequential model has been built, it behaves like a Functional API model. This means that +every layer has an input and output attribute. +These attributes can be used to do neat things, like quickly creating a +model that extracts the outputs of all intermediate layers in a +Sequential model:

+
+initial_model <- keras_model_sequential(input_shape = c(250, 250, 3)) |>
+  layer_conv_2d(filters = 32, kernel_size = 5, strides = 2, activation = "relu") |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu") |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu")
+
+
+feature_extractor <- keras_model(
+    inputs = initial_model$inputs,
+    outputs = lapply(initial_model$layers, function(x) x$output),
+)
+
+# Call feature extractor on test input.
+x <- op_ones(c(1, 250, 250, 3))
+features <- feature_extractor(x)
+

Here’s a similar example that only extract features from one +layer:

+
+initial_model <-
+  keras_model_sequential(input_shape = c(250, 250, 3)) |>
+  layer_conv_2d(filters = 32, kernel_size = 5, strides = 2,
+                activation = "relu") |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu",
+                name = "my_intermediate_layer") |>
+  layer_conv_2d(filters = 32, kernel_size = 3, activation = "relu")
+
+feature_extractor <- keras_model(
+  inputs = initial_model$inputs,
+  outputs = get_layer(initial_model, "my_intermediate_layer")$output,
+)
+
+# Call feature extractor on test input.
+x <- op_ones(c(1, 250, 250, 3))
+features <- feature_extractor(x)
+
+
+

Transfer learning with a Sequential model +

+

Transfer learning consists of freezing the bottom layers in a model +and only training the top layers. If you aren’t familiar with it, make +sure to read our guide to transfer +learning.

+

Here are two common transfer learning blueprint involving Sequential +models.

+

First, let’s say that you have a Sequential model, and you want to +freeze all layers except the last one. In this case, you can call +freeze_weights(). Alternatively, you can iterate over +model$layers and set +layer$trainable <- FALSE on each layer, except the last +one. Like this:

+
+model <- keras_model_sequential(input_shape = 784) |>
+  layer_dense(units = 32, activation = "relu") |>
+  layer_dense(units = 32, activation = "relu") |>
+  layer_dense(units = 32, activation = "relu") |>
+  layer_dense(units = 10)
+
+# Presumably you would want to first load pre-trained weights.
+model |> load_model_weights(...)
+
+# Freeze all layers except the last one.
+model |> freeze_weights(from = 1, to = -2)
+model # note the "Trainable" column now visible in the summary table
+
## Model: "sequential_9"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┓
+## ┃ Layer (type)                 Output Shape              Param #  Trai… 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━┩
+## │ dense_15 (Dense)            │ (None, 32)            │     25,120N
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dense_16 (Dense)            │ (None, 32)            │      1,056N
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dense_17 (Dense)            │ (None, 32)            │      1,056N
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dense_18 (Dense)            │ (None, 10)            │        330Y
+## └─────────────────────────────┴───────────────────────┴────────────┴───────┘
+##  Total params: 27,562 (107.66 KB)
+##  Trainable params: 330 (1.29 KB)
+##  Non-trainable params: 27,232 (106.38 KB)
+
+# Another way to freeze all layers except the last one.
+for (layer in model$layers[-length(model$layers)]) {
+  layer$trainable <- FALSE
+}
+
+# Recompile and train (this will only update the weights of the last layer).
+model |> compile(...)
+model |> fit(...)
+

Another common blueprint is to use a Sequential model to stack a +pre-trained model and some freshly initialized classification layers. +Like this:

+
+# Load a convolutional base with pre-trained weights
+base_model <- application_xception(weights = 'imagenet',
+                                   include_top = FALSE,
+                                   pooling = 'avg')
+
+# Freeze the base model
+freeze_weights(base_model)
+
+# Use a Sequential model to add a trainable classifier on top
+model <- keras_model_sequential() |>
+  base_model() |>
+  layer_dense(1000)
+
+# Compile & train
+model |> compile(...)
+model |> fit(...)
+

If you do transfer learning, you will probably find yourself +frequently using these two patterns.

+

That’s about all you need to know about Sequential models!

+

To find out more about building models in Keras, see:

+ +
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/serialization_and_saving.html b/docs/articles/serialization_and_saving.html new file mode 100644 index 0000000000..4fd7708c21 --- /dev/null +++ b/docs/articles/serialization_and_saving.html @@ -0,0 +1,955 @@ + + + + + + + + +Save, serialize, and export models • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

A Keras model consists of multiple components:

+
    +
  • The architecture, or configuration, which specifies what layers the +model contain, and how they’re connected.
  • +
  • A set of weights values (the “state of the model”).
  • +
  • An optimizer (defined by compiling the model).
  • +
  • A set of losses and metrics (defined by compiling the model).
  • +
+

The Keras API saves all of these pieces together in a unified format, +marked by the .keras extension. This is a zip archive +consisting of the following:

+
    +
  • A JSON-based configuration file (config.json): Records of model, +layer, and other trackables’ configuration.
  • +
  • A H5-based state file, such as model.weights.h5 (for +the whole model), with directory keys for layers and their weights.
  • +
  • A metadata file in JSON, storing things such as the current Keras +version.
  • +
+

Let’s take a look at how this works.

+
+
+

How to save and load a model +

+

If you only have 10 seconds to read this guide, here’s what you need +to know.

+

Saving a Keras model:

+
+# Get model (Sequential, Functional Model, or Model subclass)
+model <- ...
+
+# The filename needs to end with the .keras extension
+model |> save_model('path/to/location.keras')
+

Loading the model back:

+
+model <- load_model('path/to/location.keras')
+

Now, let’s look at the details.

+
+
+

Setup +

+ +
+
+

Saving +

+

This section is about saving an entire model to a single file. The +file will include:

+
    +
  • The model’s architecture/config
  • +
  • The model’s weight values (which were learned during training)
  • +
  • The model’s compilation information (if compile() was +called)
  • +
  • The optimizer and its state, if any (this enables you to restart +training where you left)
  • +
+
+

APIs +

+

You can save a model with save_model(). You can load it +back with load_model().

+

The only supported format in Keras 3 is the “Keras v3” format, which +uses the .keras extension.

+

Example:

+
+get_model <- function() {
+  # Create a simple model.
+  inputs <- keras_input(shape(32))
+  outputs <- inputs |> layer_dense(1)
+  model <-  keras_model(inputs, outputs)
+  model |> compile(optimizer = optimizer_adam(), loss = "mean_squared_error")
+  model
+}
+
+model <- get_model()
+
+# Train the model.
+test_input <- random_uniform(c(128, 32))
+test_target <- random_uniform(c(128, 1))
+model |> fit(test_input, test_target)
+
+# Calling `save('my_model.keras')` creates a zip archive `my_model.keras`.
+model |> save_model("my_model.keras")
+
+# It can be used to reconstruct the model identically.
+reconstructed_model <- load_model("my_model.keras")
+
+# Let's check:
+stopifnot(all.equal(
+  model |> predict(test_input),
+  reconstructed_model |> predict(test_input)
+))
+
+
+

Custom objects +

+

This section covers the basic workflows for handling custom layers, +functions, and models in Keras saving and reloading.

+

When saving a model that includes custom objects, such as a +subclassed Layer, you must define a +get_config() method on the object class. If the arguments +passed to the constructor (initialize() method) of the +custom object aren’t simple objects (anything other than types like +ints, strings, etc.), then you must also explicitly +deserialize these arguments in the from_config() class +method.

+

Like this:

+
+layer_custom <- Layer(
+  "CustomLayer",
+  initialize = function(sublayer, ...) {
+    super$initialize(...)
+    self$sublayer <- sublayer
+  },
+  call = function(x) {
+    self$sublayer(x)
+  },
+  get_config = function() {
+    base_config <- super$get_config()
+    config <- list(
+      sublayer = serialize_keras_object(self$sublayer)
+    )
+    c(base_config, config)
+  },
+  from_config = function(cls, config) {
+    sublayer_config <- config$sublayer
+    sublayer <- deserialize_keras_object(sublayer_config)
+    cls(sublayer, !!!config)
+  }
+)
+

Please see the Defining the config methods +section for more details and examples.

+

The saved .keras file is lightweight and does not store +the Python code for custom objects. Therefore, to reload the model, +load_model requires access to the definition of any custom +objects used through one of the following methods:

+
    +
  1. Registering custom objects (preferred),
  2. +
  3. Passing custom objects directly when loading, or
  4. +
  5. Using a custom object scope
  6. +
+

Below are examples of each workflow:

+
+

Registering custom objects (preferred) +

+

This is the preferred method, as custom object registration greatly +simplifies saving and loading code. Calling +register_keras_serializable() on a custom object registers +the object globally in a master list, allowing Keras to recognize the +object when loading the model.

+

Let’s create a custom model involving both a custom layer and a +custom activation function to demonstrate this.

+

Example:

+
+# Clear all previously registered custom objects
+set_custom_objects(clear = TRUE)
+
## named list()
+
+layer_custom <- Layer(
+  "CustomLayer",
+  initialize = function(self, factor) {
+    super$initialize()
+    self$factor = factor
+  },
+
+  call = function(self, x) {
+    x * self$factor
+  },
+
+  get_config = function(self) {
+    list(factor = self$factor)
+  }
+)
+
+# Upon registration, you can optionally specify a package or a name.
+# If left blank, the package defaults to "Custom" and the name defaults to
+# the class name.
+register_keras_serializable(layer_custom, package = "MyLayers")
+
+custom_fn <- keras3:::py_func2(function(x) x^2, name = "custom_fn", convert = TRUE)
+
+register_keras_serializable(custom_fn, name="custom_fn", package="my_package")
+
+
+# Create the model.
+get_model <- function() {
+  inputs <- keras_input(shape(4))
+  mid <- inputs |> layer_custom(0.5)
+  outputs <- mid |> layer_dense(1, activation = custom_fn)
+  model <- keras_model(inputs, outputs)
+  model |> compile(optimizer = "rmsprop", loss = "mean_squared_error")
+  model
+}
+
+
+# Train the model.
+train_model <- function(model) {
+  input <- random_uniform(c(4, 4))
+  target <- random_uniform(c(4, 1))
+  model |> fit(input, target, verbose = FALSE, epochs = 1)
+  model
+}
+
+test_input <- random_uniform(c(4, 4))
+test_target <- random_uniform(c(4, 1))
+
+model <- get_model() |> train_model()
+model |> save_model("custom_model.keras", overwrite = TRUE)
+
+# Now, we can simply load without worrying about our custom objects.
+reconstructed_model <- load_model("custom_model.keras")
+
+# Let's check:
+stopifnot(all.equal(
+  model |> predict(test_input, verbose = FALSE),
+  reconstructed_model |> predict(test_input, verbose = FALSE)
+))
+
+
+

Passing custom objects to load_model() +

+
+model <- get_model() |> train_model()
+
+# Calling `save_model('my_model.keras')` creates a zip archive `my_model.keras`.
+model |> save_model("custom_model.keras", overwrite = TRUE)
+
+# Upon loading, pass a named list containing the custom objects used in the
+# `custom_objects` argument of `load_model()`.
+reconstructed_model <-  load_model(
+ "custom_model.keras",
+  custom_objects = list(CustomLayer = layer_custom,
+                        custom_fn = custom_fn),
+)
+
+# Let's check:
+stopifnot(all.equal(
+  model |> predict(test_input, verbose = FALSE),
+  reconstructed_model |> predict(test_input, verbose = FALSE)
+))
+
+
+

Using a custom object scope +

+

Any code within the custom object scope will be able to recognize the +custom objects passed to the scope argument. Therefore, loading the +model within the scope will allow the loading of our custom objects.

+

Example:

+
+model <- get_model() |> train_model()
+model |> save_model("custom_model.keras", overwrite = TRUE)
+
+# Pass the custom objects dictionary to a custom object scope and place
+# the `keras.models.load_model()` call within the scope.
+custom_objects <- list(CustomLayer = layer_custom, custom_fn = custom_fn)
+
+with_custom_object_scope(custom_objects, {
+  reconstructed_model <- load_model("custom_model.keras")
+})
+
+# Let's check:
+stopifnot(all.equal(
+  model |> predict(test_input, verbose = FALSE),
+  reconstructed_model |> predict(test_input, verbose = FALSE)
+))
+
+
+
+

Model serialization +

+

This section is about saving only the model’s configuration, without +its state. The model’s configuration (or architecture) specifies what +layers the model contains, and how these layers are connected. If you +have the configuration of a model, then the model can be created with a +freshly initialized state (no weights or compilation information).

+
+

APIs +

+

The following serialization APIs are available:

+
    +
  • +clone_model(model): make a (randomly initialized) copy +of a model.
  • +
  • +get_config() and cls.from_config(): +retrieve the configuration of a layer or model, and recreate a model +instance from its config, respectively.
  • +
  • +keras.models.model_to_json() and +keras.models.model_from_json(): similar, but as JSON +strings.
  • +
  • +keras.saving.serialize_keras_object(): retrieve the +configuration any arbitrary Keras object.
  • +
  • +keras.saving.deserialize_keras_object(): recreate an +object instance from its configuration.
  • +
+
+
+

In-memory model cloning +

+

You can do in-memory cloning of a model via +clone_model(). This is equivalent to getting the config +then recreating the model from its config (so it does not preserve +compilation information or layer weights values).

+

Example:

+
+new_model <- clone_model(model)
+
+
+

+get_config() and from_config() +

+

Calling get_config(model) or +get_config(layer) will return a named list containing the +configuration of the model or layer, respectively. You should define +get_config() to contain arguments needed for the +initialize() method of the model or layer. At loading time, +the from_config(config) method will then call +initialize() with these arguments to reconstruct the model +or layer.

+

Layer example:

+
+layer <- layer_dense(, 3, activation="relu")
+layer_config <- get_config(layer)
+str(layer_config)
+
## List of 12
+##  $ name              : chr "dense_4"
+##  $ trainable         : logi TRUE
+##  $ dtype             : chr "float32"
+##  $ units             : int 3
+##  $ activation        : chr "relu"
+##  $ use_bias          : logi TRUE
+##  $ kernel_initializer:List of 4
+##   ..$ module         : chr "keras.initializers"
+##   ..$ class_name     : chr "GlorotUniform"
+##   ..$ config         :List of 1
+##   .. ..$ seed: NULL
+##   ..$ registered_name: NULL
+##  $ bias_initializer  :List of 4
+##   ..$ module         : chr "keras.initializers"
+##   ..$ class_name     : chr "Zeros"
+##   ..$ config         : Named list()
+##   ..$ registered_name: NULL
+##  $ kernel_regularizer: NULL
+##  $ bias_regularizer  : NULL
+##  $ kernel_constraint : NULL
+##  $ bias_constraint   : NULL
+##  - attr(*, "__class__")=<class 'keras.src.layers.core.dense.Dense'>
+

Now let’s reconstruct the layer using the from_config() +method:

+
+new_layer <- from_config(layer_config)
+

Sequential model example:

+
+model <- keras_model_sequential(input_shape = c(32)) |>
+  layer_dense(1)
+config <- get_config(model)
+new_model <- from_config(config)
+

Functional model example:

+
+inputs <- keras_input(c(32))
+outputs <- inputs |> layer_dense(1)
+model <- keras_model(inputs, outputs)
+config <- get_config(model)
+new_model <- from_config(config)
+
+
+

+save_model_config() and +load_model_config() +

+

This is similar to get_config / +from_config, except it turns the model into a JSON file, +which can then be loaded without the original model class. It is also +specific to models, it isn’t meant for layers.

+

Example:

+
+model <- keras_model_sequential(input_shape = c(32)) |>
+  layer_dense(1)
+save_model_config(model, "model_config.json")
+new_model <- load_model_config("model_config.json")
+
+unlink("model_config.json")
+
+
+

Arbitrary object serialization and deserialization +

+

The serialize_keras_object() and +deserialize_keras_object() APIs are general-purpose APIs +that can be used to serialize or deserialize any Keras object and any +custom object. It is at the foundation of saving model architecture and +is behind all serialize()/deserialize() calls +in keras.

+

Example:

+
+my_reg <- regularizer_l1(0.005)
+config <- serialize_keras_object(my_reg)
+str(config)
+
## List of 4
+##  $ module         : chr "keras.regularizers"
+##  $ class_name     : chr "L1"
+##  $ config         :List of 1
+##   ..$ l1: num 0.005
+##  $ registered_name: NULL
+

Note the serialization format containing all the necessary +information for proper reconstruction:

+
    +
  • +module containing the name of the Keras module or other +identifying module the object comes from
  • +
  • +class_name containing the name of the object’s +class.
  • +
  • +config with all the information needed to reconstruct +the object
  • +
  • +registered_name for custom objects. See here.
  • +
+

Now we can reconstruct the regularizer.

+
+new_reg <- deserialize_keras_object(config)
+new_reg
+
## <keras.src.regularizers.regularizers.L1 object>
+##  signature: (x)
+
+
+
+

Model weights saving +

+

You can choose to only save & load a model’s weights. This can be +useful if:

+
    +
  • You only need the model for inference: in this case you won’t need +to restart training, so you don’t need the compilation information or +optimizer state.
  • +
  • You are doing transfer learning: in this case you will be training a +new model reusing the state of a prior model, so you don’t need the +compilation information of the prior model.
  • +
+
+

APIs for in-memory weight transfer +

+

Weights can be copied between different objects by using +get_weights() and set_weights():

+
    +
  • +get_weights(<layer>): Returns a list of arrays of +weight values.
  • +
  • +set_weights(<layer>weights): Sets the model/layer +weights to the values provided (as arrays).
  • +
+

Examples:

+

Transferring weights from one layer to another, in +memory

+
+create_layer <- function() {
+  layer <- layer_dense(, 64, activation = "relu", name = "dense_2")
+  layer$build(shape(NA, 784))
+  layer
+}
+
+layer_1 <- create_layer()
+layer_2 <- create_layer()
+
+# Copy weights from layer 1 to layer 2
+layer_2 |> set_weights(get_weights(layer_1))
+

Transferring weights from one model to another model with +a compatible architecture, in memory

+
+# Create a simple functional model
+inputs <- keras_input(shape=c(784), name="digits")
+outputs <- inputs |>
+  layer_dense(64, activation = "relu", name = "dense_1") |>
+  layer_dense(64, activation = "relu", name = "dense_2") |>
+  layer_dense(10, name = "predictions")
+functional_model <- keras_model(inputs = inputs, outputs = outputs,
+                               name = "3_layer_mlp")
+
+# Define a subclassed model with the same architecture
+SubclassedModel <- new_model_class(
+  "SubclassedModel",
+  initialize = function(output_dim, name = NULL) {
+    super$initialize(name = name)
+    self$output_dim <- output_dim |> as.integer()
+    self$dense_1 <- layer_dense(, 64, activation = "relu",
+                                name = "dense_1")
+    self$dense_2 <- layer_dense(, 64, activation = "relu",
+                                name = "dense_2")
+    self$dense_3 <- layer_dense(, self$output_dim,
+                                name = "predictions")
+  },
+
+  call = function(inputs) {
+    inputs |>
+      self$dense_1() |>
+      self$dense_2() |>
+      self$dense_3()
+  },
+
+  get_config = function(self) {
+    list(output_dim = self$output_dim,
+         name = self$name)
+  }
+)
+
+
+subclassed_model <- SubclassedModel(10)
+# Call the subclassed model once to create the weights.
+subclassed_model(op_ones(c(1, 784))) |> invisible()
+
+# Copy weights from functional_model to subclassed_model.
+set_weights(subclassed_model, get_weights(functional_model))
+
+stopifnot(all.equal(
+  get_weights(functional_model),
+  get_weights(subclassed_model)
+))
+

The case of stateless layers

+

Because stateless layers do not change the order or number of +weights, models can have compatible architectures even if there are +extra/missing stateless layers.

+
+input <- keras_input(shape = c(784), name = "digits")
+output <- input |>
+  layer_dense(64, activation = "relu", name = "dense_1") |>
+  layer_dense(64, activation = "relu", name = "dense_2") |>
+  layer_dense(10, name = "predictions")
+functional_model <- keras_model(inputs, outputs,
+                                name = "3_layer_mlp")
+
+input <- keras_input(shape = c(784), name = "digits")
+output <- input |>
+  layer_dense(64, activation = "relu", name = "dense_1") |>
+  layer_dense(64, activation = "relu", name = "dense_2") |>
+  # Add a dropout layer, which does not contain any weights.
+  layer_dropout(0.5) |>
+  layer_dense(10, name = "predictions")
+
+functional_model_with_dropout <-
+  keras_model(input, output, name = "3_layer_mlp")
+
+set_weights(functional_model_with_dropout,
+            get_weights(functional_model))
+
+
+

APIs for saving weights to disk & loading them back +

+

Weights can be saved to disk by calling +save_model_weights(filepath). The filename should end in +.weights.h5.

+

Example:

+
+sequential_model = keras_model_sequential(input_shape = c(784),
+                                          input_name = "digits") |>
+  layer_dense(64, activation = "relu", name = "dense_1") |>
+  layer_dense(64, activation = "relu", name = "dense_2") |>
+  layer_dense(10, name = "predictions")
+sequential_model |> save_model_weights("my_model.weights.h5")
+sequential_model |> load_model_weights("my_model.weights.h5")
+

Note that using freeze_weights() may result in a +different output from get_weights(layer) ordering when the +model contains nested layers.

+
+
+Transfer learning example +
+

When loading pretrained weights from a weights file, it is +recommended to load the weights into the original checkpointed model, +and then extract the desired weights/layers into a new model.

+

Example:

+
+create_functional_model <- function() {
+  inputs <- keras_input(shape = c(784), name = "digits")
+  outputs <- inputs |>
+    layer_dense(64, activation = "relu", name = "dense_1") |>
+    layer_dense(64, activation = "relu", name = "dense_2") |>
+    layer_dense(10, name = "predictions")
+  keras_model(inputs, outputs, name = "3_layer_mlp")
+}
+
+functional_model <- create_functional_model()
+functional_model |> save_model_weights("pretrained.weights.h5")
+
+# In a separate program:
+pretrained_model <- create_functional_model()
+pretrained_model |> load_model_weights("pretrained.weights.h5")
+
+# Create a new model by extracting layers from the original model:
+extracted_layers <- pretrained_model$layers |> head(-1)
+model <- keras_model_sequential(layers = extracted_layers) |>
+  layer_dense(5, name = "dense_3")
+summary(model)
+
## Model: "sequential_4"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ dense_1 (Dense)                 │ (None, 64)             │        50,240
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_2 (Dense)                 │ (None, 64)             │         4,160
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_3 (Dense)                 │ (None, 5)              │           325
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 54,725 (213.77 KB)
+##  Trainable params: 54,725 (213.77 KB)
+##  Non-trainable params: 0 (0.00 B)
+
+
+
+
+

Appendix: Handling custom objects +

+ +
+

Defining the config methods +

+

Specifications:

+
    +
  • +get_config() should return a JSON-serializable named +list in order to be compatible with the Keras architecture and +model-saving APIs.
  • +
  • +from_config(config) (a class method) should return a +new layer or model object that is created from the config. The default +implementation returns do.call(cls, config).
  • +
+

NOTE: If all your constructor arguments are already +serializable, e.g. strings and ints, or non-custom Keras objects, +overriding from_config() is not necessary. However, for +more complex objects such as layers or models passed to +initialize(), deserialization must be handled explicitly +either in initialize itself or overriding the +from_config() method.

+

Example:

+
+layer_my_dense <- register_keras_serializable(
+  package = "MyLayers", name = "KernelMult",
+  object = Layer(
+    "MyDense",
+    initialize = function(units,
+                          ...,
+                          kernel_regularizer = NULL,
+                          kernel_initializer = NULL,
+                          nested_model = NULL) {
+      super$initialize(...)
+      self$hidden_units <- units
+      self$kernel_regularizer <- kernel_regularizer
+      self$kernel_initializer <- kernel_initializer
+      self$nested_model <- nested_model
+    },
+    get_config = function() {
+      config <- super$get_config()
+      # Update the config with the custom layer's parameters
+      config <- modifyList(config, list(
+        units = self$hidden_units,
+        kernel_regularizer = self$kernel_regularizer,
+        kernel_initializer = self$kernel_initializer,
+        nested_model = self$nested_model
+      ))
+      config
+    },
+    build = function(input_shape) {
+      input_units <- tail(input_shape, 1)
+      self$kernel <- self$add_weight(
+        name = "kernel",
+        shape = shape(input_units, self$hidden_units),
+        regularizer = self$kernel_regularizer,
+        initializer = self$kernel_initializer,
+      )
+    },
+    call = function(inputs) {
+      op_matmul(inputs, self$kernel)
+    }
+  )
+)
+
+
+layer <- layer_my_dense(units = 16,
+                        kernel_regularizer = "l1",
+                        kernel_initializer = "ones")
+layer3 <- layer_my_dense(units = 64, nested_model = layer)
+
+config <- serialize_keras_object(layer3)
+str(config)
+
## List of 4
+##  $ module         : chr "<r-globalenv>"
+##  $ class_name     : chr "MyDense"
+##  $ config         :List of 5
+##   ..$ name        : chr "my_dense_1"
+##   ..$ trainable   : logi TRUE
+##   ..$ dtype       : chr "float32"
+##   ..$ units       : num 64
+##   ..$ nested_model:List of 4
+##   .. ..$ module         : chr "<r-globalenv>"
+##   .. ..$ class_name     : chr "MyDense"
+##   .. ..$ config         :List of 6
+##   .. .. ..$ name              : chr "my_dense"
+##   .. .. ..$ trainable         : logi TRUE
+##   .. .. ..$ dtype             : chr "float32"
+##   .. .. ..$ units             : num 16
+##   .. .. ..$ kernel_regularizer: chr "l1"
+##   .. .. ..$ kernel_initializer: chr "ones"
+##   .. ..$ registered_name: chr "MyLayers>KernelMult"
+##  $ registered_name: chr "MyLayers>KernelMult"
+
+new_layer <- deserialize_keras_object(config)
+new_layer
+
## <MyDense name=my_dense_1, built=False>
+##  signature: (*args, **kwargs)
+

Note that overriding from_config is unnecessary above +for MyDense because hidden_units, +kernel_initializer, and kernel_regularizer are +ints, strings, and a built-in Keras object, respectively. This means +that the default from_config implementation of +cls(!!!config) will work as intended.

+

For more complex objects, such as layers and models passed to +initialize(), for example, you must explicitly deserialize +these objects. Let’s take a look at an example of a model where a +from_config override is necessary.

+

Example: +

+
+`%||%` <- \(x, y) if(is.null(x)) y else x
+layer_custom_model <- register_keras_serializable(
+  package = "ComplexModels",
+  object = Layer(
+    "CustomModel",
+    initialize = function(first_layer, second_layer = NULL, ...) {
+      super$initialize(...)
+      self$first_layer <- first_layer
+      self$second_layer <- second_layer %||% layer_dense(, 8)
+    },
+
+    get_config = function() {
+      config <- super$get_config()
+      config <- modifyList(config, list(
+        first_layer = self$first_layer,
+        second_layer = self$second_layer
+      ))
+      config
+    },
+
+    from_config = function(config) {
+      config$first_layer %<>% deserialize_keras_object()
+      config$second_layer %<>% deserialize_keras_object()
+      # note that the class is available in methods under the classname symbol,
+      # (`CustomModel` for this class), and also under the symbol `__class__`
+      cls(!!!config)
+      # CustomModel(!!!config)
+    },
+    call = function(self, inputs) {
+      inputs |>
+        self$first_layer() |>
+        self$second_layer()
+    }
+  )
+)
+
+# Let's make our first layer the custom layer from the previous example (MyDense)
+inputs <- keras_input(c(32))
+outputs <-  inputs |> layer_custom_model(first_layer=layer)
+model <- keras_model(inputs, outputs)
+
+config <- get_config(model)
+new_model <- from_config(config)
+ +
+
+

How custom objects are serialized +

+

The serialization format has a special key for custom objects +registered via register_keras_serializable(). This +registered_name key allows for easy retrieval at +loading/deserialization time while also allowing users to add custom +naming.

+

Let’s take a look at the config from serializing the custom layer +MyDense we defined above.

+

Example:

+
+layer <- layer_my_dense(
+  units = 16,
+  kernel_regularizer = regularizer_l1_l2(l1 = 1e-5, l2 = 1e-4),
+  kernel_initializer = "ones",
+)
+config <- serialize_keras_object(layer)
+str(config)
+
## List of 4
+##  $ module         : chr "<r-globalenv>"
+##  $ class_name     : chr "MyDense"
+##  $ config         :List of 6
+##   ..$ name              : chr "my_dense_2"
+##   ..$ trainable         : logi TRUE
+##   ..$ dtype             : chr "float32"
+##   ..$ units             : num 16
+##   ..$ kernel_regularizer:List of 4
+##   .. ..$ module         : chr "keras.regularizers"
+##   .. ..$ class_name     : chr "L1L2"
+##   .. ..$ config         :List of 2
+##   .. .. ..$ l1: num 1e-05
+##   .. .. ..$ l2: num 1e-04
+##   .. ..$ registered_name: NULL
+##   ..$ kernel_initializer: chr "ones"
+##  $ registered_name: chr "MyLayers>KernelMult"
+

As shown, the registered_name key contains the lookup +information for the Keras master list, including the package +MyLayers and the custom name KernelMult that +we gave when calling register_keras_serializables(). Take a +look again at the custom class definition/registration here.

+

Note that the class_name key contains the original name +of the class, allowing for proper re-initialization in +from_config.

+

Additionally, note that the module key is +NULL since this is a custom object.

+
+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/training_callbacks.html b/docs/articles/training_callbacks.html deleted file mode 100644 index 500bd27f1c..0000000000 --- a/docs/articles/training_callbacks.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - - - - diff --git a/docs/articles/training_visualization.html b/docs/articles/training_visualization.html deleted file mode 100644 index d2ba2eb1d4..0000000000 --- a/docs/articles/training_visualization.html +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/docs/articles/training_with_built_in_methods.html b/docs/articles/training_with_built_in_methods.html new file mode 100644 index 0000000000..7003a2da50 --- /dev/null +++ b/docs/articles/training_with_built_in_methods.html @@ -0,0 +1,1413 @@ + + + + + + + + +Training & evaluation with the built-in methods • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+ +
+
+

Introduction +

+

This guide covers training, evaluation, and prediction (inference) +models when using built-in APIs for training & validation (such as +fit(), evaluate() and +predict()).

+

If you are interested in leveraging fit() while +specifying your own training step function, see the Customizing what happens in +fit() guide.

+ + + + +

If you are interested in writing your own training & evaluation +loops from scratch, see the guide Writing a +training loop from scratch.

+ + + + +

In general, whether you are using built-in loops or writing your own, +model training & evaluation works strictly in the same way across +every kind of Keras model – Sequential models, models built with the +Functional API, and models written from scratch via model +subclassing.

+
+
+

API overview: a first end-to-end example +

+

When passing data to the built-in training loops of a model, you +should either use:

+
    +
  • Arrays (if your data is small and fits in memory)
  • +
  • +tf_dataset objects
  • +
  • PyTorch DataLoader instances
  • +
+

In the next few paragraphs, we’ll use the MNIST dataset as NumPy +arrays, in order to demonstrate how to use optimizers, losses, and +metrics. Afterwards, we’ll take a close look at each of the other +options.

+

Let’s consider the following model (here, we build in with the +Functional API, but it could be a Sequential model or a subclassed model +as well):

+
+inputs <- keras_input(shape = 784, name="digits")
+outputs <- inputs |>
+  layer_dense(units = 64, activation = "relu", name = "dense_1") |>
+  layer_dense(units = 64, activation = "relu", name = "dense_2") |>
+  layer_dense(units = 10, activation = "softmax", name = "predictions")
+model <- keras_model(inputs = inputs, outputs = outputs)
+summary(model)
+
## Model: "functional_1"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ digits (InputLayer)             │ (None, 784)            │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_1 (Dense)                 │ (None, 64)             │        50,240
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_2 (Dense)                 │ (None, 64)             │         4,160
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ predictions (Dense)             │ (None, 10)             │           650
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 55,050 (215.04 KB)
+##  Trainable params: 55,050 (215.04 KB)
+##  Non-trainable params: 0 (0.00 B)
+

Here’s what the typical end-to-end workflow looks like, consisting +of:

+
    +
  • Training
  • +
  • Validation on a holdout set generated from the original training +data
  • +
  • Evaluation on the test data
  • +
+

We’ll use MNIST data for this example.

+
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+
+# Preprocess the data (these are NumPy arrays)
+x_train <- array_reshape(x_train, c(60000, 784)) / 255
+x_test <- array_reshape(x_test, c(10000, 784)) / 255
+
+# Reserve 10,000 samples for validation
+x_val <- x_train[1:10000,]
+y_val <- y_train[1:10000]
+x_train <- x_train[-c(1:10000),]
+y_train <- y_train[-c(1:10000)]
+

We specify the training configuration (optimizer, loss, metrics):

+
+model |> compile(
+  # Optimizer
+  optimizer = optimizer_rmsprop(),
+  # Loss function to minimize
+  loss = loss_sparse_categorical_crossentropy(),
+  # List of metrics to monitor
+  metrics = list(metric_sparse_categorical_accuracy())
+)
+

We call fit(), which will train the model by slicing the +data into “batches” of size batch_size, and repeatedly +iterating over the entire dataset for a given number of +epochs.

+
+history <- model |> fit(
+  x_train, y_train,
+  batch_size = 64,
+  epochs = 2,
+  # We pass some validation for
+  # monitoring validation loss and metrics
+  # at the end of each epoch
+  validation_data = list(x_val, y_val)
+)
+
## Epoch 1/2
+## 782/782 - 2s - 3ms/step - loss: 0.3410 - sparse_categorical_accuracy: 0.9035 - val_loss: 0.1869 - val_sparse_categorical_accuracy: 0.9455
+## Epoch 2/2
+## 782/782 - 1s - 973us/step - loss: 0.1588 - sparse_categorical_accuracy: 0.9532 - val_loss: 0.1303 - val_sparse_categorical_accuracy: 0.9626
+

The returned history object holds a record of the loss +values and metric values during training:

+
+history
+
##
+## Final epoch (plot to see history):
+##                            loss: 0.1588
+##     sparse_categorical_accuracy: 0.9532
+##                        val_loss: 0.1303
+## val_sparse_categorical_accuracy: 0.9626
+

We evaluate the model on the test data via +evaluate():

+
+# Evaluate the model on the test data using `evaluate`
+results <- model |> evaluate(x_test, y_test, batch_size=128)
+
## 79/79 - 0s - 3ms/step - loss: 0.1258 - sparse_categorical_accuracy: 0.9625
+
+results
+
## $loss
+## [1] 0.1257554
+##
+## $sparse_categorical_accuracy
+## [1] 0.9625
+
+# Generate predictions (probabilities -- the output of the last layer)
+# on new data using `predict`
+predictions <- model |> predict(x_test[1:2,])
+
## 1/1 - 0s - 152ms/step
+
+dim(predictions)
+
## [1]  2 10
+

Now, let’s review each piece of this workflow in detail.

+
+
+

The compile() method: specifying a loss, metrics, and +an optimizer +

+

To train a model with fit(), you need to specify a loss +function, an optimizer, and optionally, some metrics to monitor.

+

You pass these to the model as arguments to the +compile() method:

+
+model |> compile(
+  optimizer = optimizer_rmsprop(learning_rate = 1e-3),
+  loss = loss_sparse_categorical_crossentropy(),
+  metrics = list(metric_sparse_categorical_accuracy())
+)
+

The metrics argument should be a list – your model can +have any number of metrics.

+

If your model has multiple outputs, you can specify different losses +and metrics for each output, and you can modulate the contribution of +each output to the total loss of the model. You will find more details +about this in the Passing data to multi-input, multi-output +models section.

+

Note that if you’re satisfied with the default settings, in many +cases the optimizer, loss, and metrics can be specified via string +identifiers as a shortcut:

+
+model |> compile(
+  optimizer = "rmsprop",
+  loss = "sparse_categorical_crossentropy",
+  metrics = c("sparse_categorical_accuracy")
+)
+

For later reuse, let’s put our model definition and compile step in +functions; we will call them several times across different examples in +this guide.

+
+get_uncompiled_model <- function() {
+  inputs <- keras_input(shape = 784, name = "digits")
+  outputs <- inputs |>
+    layer_dense(units = 64, activation = "relu", name = "dense_1") |>
+    layer_dense(units = 64, activation = "relu", name = "dense_2") |>
+    layer_dense(units = 10, activation = "softmax", name = "predictions")
+  keras_model(inputs = inputs, outputs = outputs)
+}
+
+get_compiled_model <- function() {
+  model <- get_uncompiled_model()
+  model |> compile(
+    optimizer = "rmsprop",
+    loss = "sparse_categorical_crossentropy",
+    metrics = c("sparse_categorical_accuracy")
+  )
+  model
+}
+
+

Many built-in optimizers, losses, and metrics are available +

+

In general, you won’t have to create your own losses, metrics, or +optimizers from scratch, because what you need is likely to be already +part of the Keras API:

+

Optimizers:

+ +

Losses:

+ +

Metrics:

+ +
+
+

Custom losses +

+

If you need to create a custom loss, Keras provides three ways to do +so.

+

The first method involves creating a function that accepts inputs +y_true and y_pred. The following example shows +a loss function that computes the mean squared error between the real +data and the predictions:

+
+custom_mean_squared_error <- function(y_true, y_pred) {
+  op_mean(op_square(y_true - y_pred), axis = -1)
+}
+
+model <- get_uncompiled_model()
+model |> compile(optimizer = "adam",
+                 loss = custom_mean_squared_error)
+
+# We need to one-hot encode the labels to use MSE
+y_train_one_hot <- op_one_hot(y_train, num_classes = 10)
+model |> fit(x_train, y_train_one_hot, batch_size = 64, epochs = 2)
+
## Epoch 1/2
+## 782/782 - 2s - 2ms/step - loss: 0.0161
+## Epoch 2/2
+## 782/782 - 1s - 677us/step - loss: 0.0078
+

If you need a loss function that takes in parameters beside +y_true and y_pred, you can subclass the Keras +base Loss class using [Loss()] and implement +the following two methods:

+
    +
  • +initialize(): accept parameters to pass during the call +of your loss function
  • +
  • +call(y_true, y_pred): use the targets (y_true) and the +model predictions (y_pred) to compute the model’s loss
  • +
+

Let’s say you want to use mean squared error, but with an added term +that will de-incentivize prediction values far from 0.5 (we assume that +the categorical targets are one-hot encoded and take values between 0 +and 1). This creates an incentive for the model not to be too confident, +which may help reduce overfitting (we won’t know if it works until we +try!).

+

Here’s how you would do it:

+
+loss_custom_mse <- Loss(
+  classname = "CustomMSE",
+  initialize = function(regularization_factor = 0.1, name = "custom_mse") {
+    super$initialize(name = name)
+    self$regularization_factor <- regularization_factor
+  },
+  call = function(y_true, y_pred) {
+    mse <- op_mean(op_square(y_true - y_pred), axis = -1)
+    reg <- op_mean(op_square(0.5 - y_pred), axis = -1)
+    mse + reg * self$regularization_factor
+  }
+)
+
+model <- get_uncompiled_model()
+model |> compile(optimizer="adam", loss = loss_custom_mse())
+
+y_train_one_hot <- op_one_hot(y_train, num_classes=10)
+model |> fit(x_train, y_train_one_hot, batch_size=64, epochs=1)
+
## 782/782 - 2s - 2ms/step - loss: 0.0390
+
+
+

Custom metrics +

+

If you need a metric that isn’t part of the API, you can easily +create custom metrics by subclassing the Keras base Metric +class using [Metric()]. You will need to implement 4 +methods:

+
    +
  • +initialize(), in which you will create state variables +for your metric.
  • +
  • +update_state(y_true, y_pred, sample_weight = NULL), +which uses the targets y_true and the model predictions y_pred to update +the state variables.
  • +
  • +result(), which uses the state variables to compute the +final results.
  • +
  • +reset_state(), which reinitializes the state of the +metric.
  • +
+

State update and results computation are kept separate (in +update_state() and result(), respectively) +because in some cases, the results computation might be very expensive +and would only be done periodically.

+

Here’s a simple example showing how to implement a +CategoricalTruePositives metric that counts how many +samples were correctly classified as belonging to a given class:

+
+metric_categorical_true_positives <- Metric(
+  "CategoricalTruePositives",
+
+  initialize = function(name = "categorical_true_positives", ...) {
+    super$initialize(name = name, ...)
+    self$true_positives <- self$add_variable(shape = shape(),
+                                             name = "ctp",
+                                             initializer = "zeros")
+  },
+
+  update_state = function(y_true, y_pred, sample_weight = NULL) {
+    y_pred <- op_argmax(y_pred, axis = 2) |> op_reshape(c(-1, 1))
+    values <- op_cast(y_true, "int32") == op_cast(y_pred, "int32")
+    values <- op_cast(values, "float32")
+    if (!is.null(sample_weight)) {
+      sample_weight <- op_cast(sample_weight, "float32")
+      values <- op_multiply(values, sample_weight)
+    }
+    self$true_positives$assign_add(op_sum(values))
+  },
+
+  result = function() {
+    self$true_positives$value
+  },
+
+  reset_state = function() {
+    self$true_positives$assign(0.0)
+  }
+)
+
+model <- get_uncompiled_model()
+model |> compile(
+  optimizer = optimizer_rmsprop(learning_rate = 1e-3),
+  loss = loss_sparse_categorical_crossentropy(),
+  metrics = c(metric_categorical_true_positives())
+)
+history <- model |> fit(x_train, y_train, batch_size = 64, epochs = 3)
+
## Epoch 1/3
+## 782/782 - 1s - 2ms/step - categorical_true_positives: 360502.0000 - loss: 0.3444
+## Epoch 2/3
+## 782/782 - 1s - 862us/step - categorical_true_positives: 362616.0000 - loss: 0.1656
+## Epoch 3/3
+## 782/782 - 1s - 660us/step - categorical_true_positives: 363187.0000 - loss: 0.1203
+
+
+

Handling losses and metrics that don’t fit the standard +signature +

+

The overwhelming majority of losses and metrics can be computed from +y_true and y_pred, where y_pred +is an output of your model – but not all of them. For instance, a +regularization loss may only require the activation of a layer (there +are no targets in this case), and this activation may not be a model +output.

+

In such cases, you can call self$add_loss(loss_value) +from inside the call method of a custom layer. Losses added in this way +get added to the “main” loss during training (the one passed to +compile()). Here’s a simple example that adds activity +regularization (note that activity regularization is built-in in all +Keras layers – this layer is just for the sake of providing a concrete +example):

+
+layer_custom_activity_regularizer <- Layer(
+  "ActivityRegularization",
+  call = function(inputs) {
+    self$add_loss(op_sum(inputs) * 0.1)
+    inputs  # Pass-through layer.
+  }
+)
+
+inputs <- keras_input(shape = 784, name = "digits")
+outputs <- inputs |>
+  layer_dense(units = 32, activation = "relu", name = "dense_1") |>
+  layer_custom_activity_regularizer() |>
+  layer_dense(units = 64, activation = "relu", name = "dense_2") |>
+  layer_dense(units = 10, name = "predictions")
+
+model <- keras_model(inputs = inputs, outputs = outputs)
+model |> compile(optimizer = optimizer_rmsprop(learning_rate = 1e-3),
+                 loss = loss_sparse_categorical_crossentropy(from_logits = TRUE))
+
+# The displayed loss will be much higher than before
+# due to the regularization component.
+model |> fit(x_train, y_train, batch_size = 64, epochs = 1)
+
## 782/782 - 2s - 2ms/step - loss: 2.3721
+

Note that when you pass losses via add_loss(), it +becomes possible to call compile() without a loss function, +since the model already has a loss to minimize.

+

Consider the following LogisticEndpoint layer: it takes +as inputs targets & logits, and it tracks a crossentropy loss via +add_loss().

+
+layer_logistic_endpoint <- Layer(
+  "LogisticEndpoint",
+  initialize = function(name = NULL) {
+    super$initialize(name = name)
+    self$loss_fn <- loss_binary_crossentropy(from_logits = TRUE)
+  },
+  call = function(targets, logits, sample_weights = NULL) {
+    # Compute the training-time loss value and add it
+    # to the layer using `self.add_loss()`.
+    loss <- self$loss_fn(targets, logits, sample_weights)
+    self$add_loss(loss)
+
+    # Return the inference-time prediction tensor (for `predict()`).
+    op_softmax(logits)
+  }
+)
+

You can use it in a model with two inputs (input data & targets), +compiled without a loss argument, like this:

+
+inputs <- keras_input(shape = 3, name = "inputs")
+targets <- keras_input(shape = 10, name = "targets")
+
+logits <- inputs |> layer_dense(10)
+predictions <- layer_logistic_endpoint(name = "predictions")(targets, logits)
+
+model <- keras_model(inputs = list(inputs, targets),
+                     outputs = predictions)
+model |> compile(optimizer = "adam")  # No loss argument!
+
+data <- list(
+  inputs = random_normal(c(3, 3)),
+  targets = random_normal(c(3, 10))
+)
+model |> fit(data, epochs = 1)
+
## 1/1 - 1s - 512ms/step - loss: 1.0566
+

For more information about training multi-input models, see the +section Passing data to multi-input, multi-output +models.

+
+
+

Automatically setting apart a validation holdout set +

+

In the first end-to-end example you saw, we used the +validation_data argument to pass a list of arrays +list(x_val, y_val) to the model for evaluating a validation +loss and validation metrics at the end of each epoch.

+

Here’s another option: the argument validation_split +allows you to automatically reserve part of your training data for +validation. The argument value represents the fraction of the data to be +reserved for validation, so it should be set to a number higher than 0 +and lower than 1. For instance, validation_split = 0.2 +means “use 20% of the data for validation”, and +validation_split = 0.6 means “use 60% of the data for +validation”.

+

The way the validation is computed is by taking the last x% samples +of the arrays received by the fit() call, before any +shuffling.

+

Note that you can only use validation_split when +training with NumPy data.

+
+model <- get_compiled_model()
+model |> fit(x_train, y_train,
+             batch_size = 64,
+             validation_split = 0.2, epochs = 1)
+
## 625/625 - 2s - 3ms/step - loss: 0.3817 - sparse_categorical_accuracy: 0.8919 - val_loss: 0.1953 - val_sparse_categorical_accuracy: 0.9431
+
+
+
+

Training & evaluation using TF Dataset objects +

+

In the past few paragraphs, you’ve seen how to handle losses, +metrics, and optimizers, and you’ve seen how to use the +validation_data and validation_split arguments +in fit(), when your data is passed as arrays.

+

Another option is to use an iterator-like, such as a +tf.data.Dataset, a PyTorch DataLoader, or an R +generator function. Let’s take look at the former.

+

The tfdatasets R package containes a set of utilities +for loading and preprocessing data in a way that’s fast and scalable. +For a complete guide about creating Datasets, see the tf.data +documentation.

+

You can use tf.data to train your Keras models +regardless of the backend you’re using – whether it’s JAX, PyTorch, or +TensorFlow. You can pass a Dataset instance +directly to the methods fit(), evaluate(), and +predict():

+
+library(tfdatasets, exclude = "shape")
+model <- get_compiled_model()
+
+# First, let's create a training Dataset instance.
+# For the sake of our example, we'll use the same MNIST data as before.
+train_dataset <- tensor_slices_dataset(list(x_train, y_train))
+
+# Shuffle and slice the dataset.
+train_dataset <- train_dataset |>
+  dataset_shuffle(buffer_size=1024) |>
+  dataset_batch(64)
+
+# Now we get a test dataset.
+test_dataset <-
+  tensor_slices_dataset(list(x_test, y_test)) |>
+  dataset_batch(64)
+
+# Since the dataset already takes care of batching,
+# we don't pass a `batch_size` argument.
+model |> fit(train_dataset, epochs = 3)
+
## Epoch 1/3
+## 782/782 - 2s - 2ms/step - loss: 0.3365 - sparse_categorical_accuracy: 0.9041
+## Epoch 2/3
+## 782/782 - 1s - 821us/step - loss: 0.1605 - sparse_categorical_accuracy: 0.9524
+## Epoch 3/3
+## 782/782 - 1s - 811us/step - loss: 0.1185 - sparse_categorical_accuracy: 0.9647
+
+# You can also evaluate or predict on a dataset.
+result <- model |> evaluate(test_dataset)
+
## 157/157 - 1s - 5ms/step - loss: 0.1152 - sparse_categorical_accuracy: 0.9627
+
+result
+
## $loss
+## [1] 0.1151983
+##
+## $sparse_categorical_accuracy
+## [1] 0.9627
+

Note that the Dataset is reset at the end of each epoch, +so it can be reused of the next epoch.

+

If you want to run training only on a specific number of batches from +this Dataset, you can pass the steps_per_epoch argument, +which specifies how many training steps the model should run using this +Dataset before moving on to the next epoch.

+
+model <- get_compiled_model()
+
+# Prepare the training dataset
+train_dataset <- tensor_slices_dataset(list(x_train, y_train))
+train_dataset <- train_dataset |>
+  dataset_shuffle(buffer_size = 1024) |>
+  dataset_batch(64)
+
+# Only use the 100 batches per epoch (that's 64 * 100 samples)
+model |> fit(train_dataset, epochs = 3, steps_per_epoch = 100)
+
## Epoch 1/3
+## 100/100 - 1s - 8ms/step - loss: 0.8017 - sparse_categorical_accuracy: 0.7806
+## Epoch 2/3
+## 100/100 - 0s - 905us/step - loss: 0.3661 - sparse_categorical_accuracy: 0.9006
+## Epoch 3/3
+## 100/100 - 0s - 703us/step - loss: 0.3009 - sparse_categorical_accuracy: 0.9106
+

You can also pass a Dataset instance as the +validation_data argument in fit():

+
+model <- get_compiled_model()
+
+# Prepare the training dataset
+train_dataset <- tensor_slices_dataset(list(x_train, y_train))
+train_dataset <- train_dataset |>
+  dataset_shuffle(buffer_size=1024) |>
+  dataset_batch(64)
+
+# Prepare the validation dataset
+val_dataset <- tensor_slices_dataset(list(x_val, y_val))
+val_dataset <- val_dataset |> dataset_batch(64)
+
+model |> fit(train_dataset, epochs = 1, validation_data = val_dataset)
+
## 782/782 - 2s - 3ms/step - loss: 0.3428 - sparse_categorical_accuracy: 0.9022 - val_loss: 0.2337 - val_sparse_categorical_accuracy: 0.9291
+

At the end of each epoch, the model will iterate over the validation +dataset and compute the validation loss and validation metrics.

+

If you want to run validation only on a specific number of batches +from this dataset, you can pass the validation_steps +argument, which specifies how many validation steps the model should run +with the validation dataset before interrupting validation and moving on +to the next epoch:

+
+model  <- get_compiled_model()
+
+# Prepare the training dataset
+train_dataset <- tensor_slices_dataset(list(x_train, y_train))
+train_dataset <- train_dataset |>
+  dataset_shuffle(buffer_size = 1024) |>
+  dataset_batch(64)
+
+# Prepare the validation dataset
+val_dataset <- tensor_slices_dataset(list(x_val, y_val))
+val_dataset <- val_dataset |> dataset_batch(64)
+
+model %>% fit(
+  train_dataset,
+  epochs = 1,
+  # Only run validation using the first 10 batches of the dataset
+  # using the `validation_steps` argument
+  validation_data = val_dataset,
+  validation_steps = 10,
+)
+
## 782/782 - 2s - 2ms/step - loss: 0.3391 - sparse_categorical_accuracy: 0.9035 - val_loss: 0.1997 - val_sparse_categorical_accuracy: 0.9391
+

Note that the validation dataset will be reset after each use (so +that you will always be evaluating on the same samples from epoch to +epoch).

+

The argument validation_split (generating a holdout set +from the training data) is not supported when training from +Dataset objects, since this feature requires the ability to +index the samples of the datasets, which is not possible in general with +the Dataset API.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+

Using sample weighting and class weighting +

+

With the default settings the weight of a sample is decided by its +frequency in the dataset. There are two methods to weight the data, +independent of sample frequency:

+
    +
  • Class weights
  • +
  • Sample weights
  • +
+
+

Class weights +

+

This is set by passing a named list to the class_weight +argument to fit(). This list maps class indices to the +weight that should be used for samples belonging to this class.

+

This can be used to balance classes without resampling, or to train a +model that gives more importance to a particular class.

+

For instance, if class “0” is half as represented as class “1” in +your data, you could use +model |> fit(..., class_weight = c("0" = 1, "1" = 0.5)).

+

Here’s an R example where we use class weights or sample weights to +give more importance to the correct classification of class #5 (which is +the digit “5” in the MNIST dataset).

+
+class_weight <- c(
+    "0" = 1.0,
+    "1" = 1.0,
+    "2" = 1.0,
+    "3" = 1.0,
+    "4" = 1.0,
+    # Set weight "2" for class "5",
+    # making this class 2x more important
+    "5" = 2.0,
+    "6" = 1.0,
+    "7" = 1.0,
+    "8" = 1.0,
+    "9" = 1.0
+)
+
+model <- get_compiled_model()
+model |> fit(x_train, y_train,
+             class_weight = class_weight,
+             batch_size = 64, epochs = 1)
+
## 782/782 - 2s - 2ms/step - loss: 0.3713 - sparse_categorical_accuracy: 0.9018
+
+
+

Sample weights +

+

For fine grained control, or if you are not building a classifier, +you can use sample_weights.

+
    +
  • When training from R arrays: Pass the sample_weight +argument to fit().
  • +
  • When training from tf_dataset or any other sort of +iterator: yield +(input_batch, label_batch, sample_weight_batch) +tuples.
  • +
+

A “sample weights” array is an array of numbers that specify how much +weight each sample in a batch should have in computing the total loss. +It is commonly used in imbalanced classification problems (the idea +being to give more weight to rarely-seen classes).

+

When the weights used are ones and zeros, the array can be used as a +mask for the loss function (entirely discarding the +contribution of certain samples to the total loss).

+
+sample_weight <- rep(1.0, length(y_train))
+sample_weight[y_train == 5] <- 2.0
+
+model <- get_compiled_model()
+model |> fit(
+  x_train, y_train,
+  sample_weight = sample_weight,
+  batch_size = 64, epochs = 1
+)
+
## 782/782 - 1s - 2ms/step - loss: 0.3740 - sparse_categorical_accuracy: 0.9015
+

Here’s a matching Dataset example:

+
+sample_weight <- rep(1.0, length(y_train))
+sample_weight[y_train == 5] <- 2.0
+
+# Create a Dataset that includes sample weights
+# (3rd element in the return tuple).
+train_dataset <- tensor_slices_dataset(list(
+    x_train, y_train, sample_weight
+))
+
+# Shuffle and slice the dataset.
+train_dataset <- train_dataset |>
+  dataset_shuffle(buffer_size = 1024) |>
+  dataset_batch(64)
+
+model <- get_compiled_model()
+model |> fit(train_dataset, epochs = 1)
+
## 782/782 - 2s - 2ms/step - loss: 0.3654 - sparse_categorical_accuracy: 0.9057
+
+
+
+

Passing data to multi-input, multi-output models +

+

In the previous examples, we were considering a model with a single +input (a tensor of shape (764)) and a single output (a +prediction tensor of shape (10)). But what about models +that have multiple inputs or outputs?

+

Consider the following model, which has an image input of shape +(32, 32, 3) (that’s (height, width, channels)) +and a time series input of shape (NA, 10) (that’s +(timesteps, features)). Our model will have two outputs +computed from the combination of these inputs: a “score” (of shape +(1)) and a probability distribution over five classes (of +shape (5)).

+
+image_input <- keras_input(c(32, 32, 3), name = "img_input")
+timeseries_input <- keras_input(c(NA, 10), name = "ts_input")
+
+x1 <- image_input |>
+  layer_conv_2d(filters = 3, kernel_size = c(3, 3)) |>
+  layer_global_max_pooling_2d()
+
+x2 <- timeseries_input |>
+  layer_conv_1d(filters = 3, kernel_size = 3) |>
+  layer_global_max_pooling_1d()
+
+x <- layer_concatenate(x1, x2)
+
+score_output <- layer_dense(x, 1, name = "score_output")
+class_output <- layer_dense(x, 5, name = "class_output")
+
+model <- keras_model(
+  inputs = list(image_input, timeseries_input),
+  outputs = list(score_output, class_output)
+)
+

Let’s plot this model, so you can clearly see what we’re doing here +(note that the shapes shown in the plot are batch shapes, rather than +per-sample shapes).

+
+plot(model, show_shapes = TRUE)
+
+plot of chunk unnamed-chunk-26

+plot of chunk unnamed-chunk-26 +

+
+

At compilation time, we can specify different losses to different +outputs, by passing the loss functions as a list:

+
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(
+    loss_mean_squared_error(),
+    loss_categorical_crossentropy()
+  )
+)
+

If we only passed a single loss function to the model, the same loss +function would be applied to every output (which is not appropriate +here).

+

Likewise for metrics:

+ +

Since we gave names to our output layers, we could also specify +per-output losses and metrics via a named list:

+
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(
+    score_output = loss_mean_squared_error(),
+    class_output = loss_categorical_crossentropy()
+  ),
+  metrics = list(
+    score_output = list(
+      metric_mean_absolute_error(),
+      metric_mean_absolute_percentage_error()
+    ),
+    class_output = list(metric_categorical_accuracy())
+  )
+)
+

We recommend the use of names if you have more than 2 outputs.

+

It’s possible to give different weights to different output-specific +losses (for instance, one might wish to privilege the “score” loss in +our example, by giving to 2x the importance of the class loss), using +the loss_weights argument:

+
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(
+    score_output = loss_mean_squared_error(),
+    class_output = loss_categorical_crossentropy()
+  ),
+  metrics = list(
+    score_output = list(
+      metric_mean_absolute_error(),
+      metric_mean_absolute_percentage_error()
+    ),
+    class_output = list(metric_categorical_accuracy())
+  ),
+  loss_weights = list(score_output = 2.0, class_output = 1.0)
+)
+

You could also choose not to compute a loss for certain outputs, if +these outputs are meant for prediction but not for training:

+
+# loss list, positional version
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(NULL, loss_categorical_crossentropy())
+)
+
+# Or loss list, named version
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(class_output = loss_categorical_crossentropy())
+)
+

Passing data to a multi-input or multi-output model in +fit() works in a similar way as specifying a loss function +in compile: you can pass lists of arrays (with 1:1 +mapping to the outputs that received a loss function) or dicts +mapping output names to arrays.

+
+model |> compile(
+  optimizer = optimizer_rmsprop(1e-3),
+  loss = list(
+    loss_mean_squared_error(),
+    loss_categorical_crossentropy()
+  )
+)
+
+# Generate dummy data
+img_data <- random_normal(c(100, 32, 32, 3))
+ts_data <- random_normal(c(100, 20, 10))
+score_targets <- random_normal(c(100, 1))
+class_targets <- random_normal(c(100, 5))
+
+# Fit on unnamed lists (positional matching)
+model |> fit(
+    list(img_data, ts_data),
+    list(score_targets, class_targets),
+    batch_size=32,
+    epochs=1
+)
+
## 4/4 - 2s - 497ms/step - loss: 1.3788
+
+# Alternatively, fit on named lists (names matching)
+model |> fit(
+  list(img_input = img_data, ts_input = ts_data),
+  list(score_output = score_targets, class_output = class_targets),
+  batch_size = 32,
+  epochs = 1
+)
+
## 4/4 - 1s - 250ms/step - loss: 0.2857
+

Here’s the Dataset use case: similarly as what we did +for R arrays, the Dataset should return a tuple of named +lists (dicts).

+
+train_dataset <- tensor_slices_dataset(list(
+  list(img_input = img_data, ts_input = ts_data),
+  list(score_output = score_targets, class_output = class_targets)
+))
+train_dataset <- train_dataset |>
+  dataset_shuffle(buffer_size = 1024) |>
+  dataset_batch(64)
+
+model |> fit(train_dataset, epochs = 1)
+
## 2/2 - 1s - 665ms/step - loss: 0.5600
+
+
+

Using callbacks +

+

Callbacks in Keras are objects that are called at different points +during training (at the start of an epoch, at the end of a batch, at the +end of an epoch, etc.). They can be used to implement certain behaviors, +such as:

+
    +
  • Doing validation at different points during training (beyond the +built-in per-epoch validation)
  • +
  • Checkpointing the model at regular intervals or when it exceeds a +certain accuracy threshold
  • +
  • Changing the learning rate of the model when training seems to be +plateauing
  • +
  • Doing fine-tuning of the top layers when training seems to be +plateauing
  • +
  • Sending email or instant message notifications when training ends or +where a certain performance threshold is exceeded
  • +
  • Etc.
  • +
+

Callbacks can be passed as a list to your call to +fit():

+
+model <- get_compiled_model()
+
+callbacks <- list(
+  callback_early_stopping(
+    # Stop training when `val_loss` is no longer improving
+    monitor = "val_loss",
+    # "no longer improving" being defined as "no better than 1e-2 less"
+    min_delta = 1e-2,
+    # "no longer improving" being further defined as "for at least 2 epochs"
+    patience = 2,
+    verbose = 1
+  )
+)
+model |> fit(
+  x_train,
+  y_train,
+  epochs = 20,
+  batch_size = 64,
+  callbacks = callbacks,
+  validation_split = 0.2,
+)
+
## Epoch 1/20
+## 625/625 - 2s - 3ms/step - loss: 0.3695 - sparse_categorical_accuracy: 0.8961 - val_loss: 0.1873 - val_sparse_categorical_accuracy: 0.9469
+## Epoch 2/20
+## 625/625 - 1s - 1ms/step - loss: 0.1751 - sparse_categorical_accuracy: 0.9489 - val_loss: 0.1403 - val_sparse_categorical_accuracy: 0.9579
+## Epoch 3/20
+## 625/625 - 1s - 2ms/step - loss: 0.1277 - sparse_categorical_accuracy: 0.9625 - val_loss: 0.1218 - val_sparse_categorical_accuracy: 0.9651
+## Epoch 4/20
+## 625/625 - 1s - 2ms/step - loss: 0.1007 - sparse_categorical_accuracy: 0.9700 - val_loss: 0.1153 - val_sparse_categorical_accuracy: 0.9661
+## Epoch 5/20
+## 625/625 - 1s - 1ms/step - loss: 0.0822 - sparse_categorical_accuracy: 0.9760 - val_loss: 0.1104 - val_sparse_categorical_accuracy: 0.9670
+## Epoch 6/20
+## 625/625 - 1s - 975us/step - loss: 0.0683 - sparse_categorical_accuracy: 0.9801 - val_loss: 0.1098 - val_sparse_categorical_accuracy: 0.9689
+## Epoch 7/20
+## 625/625 - 1s - 1ms/step - loss: 0.0571 - sparse_categorical_accuracy: 0.9838 - val_loss: 0.1116 - val_sparse_categorical_accuracy: 0.9698
+## Epoch 8/20
+## 625/625 - 1s - 1ms/step - loss: 0.0485 - sparse_categorical_accuracy: 0.9864 - val_loss: 0.1126 - val_sparse_categorical_accuracy: 0.9702
+## Epoch 8: early stopping
+
+

Many built-in callbacks are available +

+

There are many built-in callbacks already available in Keras, such +as:

+ +

See the callbacks +documentation for the complete list.

+
+
+

Writing your own callback +

+

You can create a custom callback by subclassing the base +[Callback()] class. A callback has access to its associated +model through the class property self$model.

+

Make sure to read the complete guide to writing +custom callbacks.

+

Here’s a simple example saving a list of per-batch loss values during +training:

+
+callback_loss_history <- Callback(
+  classname = "LossHistory",
+  initialize = function(file = "per_training_batch_losses.txt", ...) {
+    super$initialize(...)
+    private$file <- file
+  },
+  on_train_begin = function(logs = NULL) {
+    private$per_batch_losses <- fastmap::faststack()
+  },
+  on_train_batch_begin = function(batch, logs = NULL) {
+    private$per_batch_losses$push(logs$loss)
+  },
+  on_train_end = function(logs = NULL) {
+    per_batch_losses <- private$per_batch_losses$as_list() |> as.numeric()
+    write(per_batch_losses, private$file)
+  }
+)
+
+
+
+

Checkpointing models +

+

When you’re training model on relatively large datasets, it’s crucial +to save checkpoints of your model at frequent intervals.

+

The easiest way to achieve this is with +[callback_model_checkpoint()]:

+
+model <- get_compiled_model()
+
+callbacks <- list(
+  callback_model_checkpoint(
+    # Path where to save the model
+    # The two parameters below mean that we will overwrite
+    # the current checkpoint if and only if
+    # the `val_loss` score has improved.
+    # The saved model name will include the current epoch.
+    filepath = "mymodel_{epoch}.keras",
+    save_best_only = TRUE,
+    # Only save a model if `val_loss` has improved.
+    monitor = "val_loss",
+    verbose = 1
+  )
+)
+model |> fit(
+  x_train, y_train,
+  epochs = 2, batch_size = 64,
+  callbacks = callbacks,
+  validation_split = 0.2
+)
+
## Epoch 1/2
+##
+## Epoch 1: val_loss improved from inf to 0.19344, saving model to mymodel_1.keras
+## 625/625 - 2s - 3ms/step - loss: 0.3787 - sparse_categorical_accuracy: 0.8940 - val_loss: 0.1934 - val_sparse_categorical_accuracy: 0.9441
+## Epoch 2/2
+##
+## Epoch 2: val_loss improved from 0.19344 to 0.14251, saving model to mymodel_2.keras
+## 625/625 - 1s - 1ms/step - loss: 0.1768 - sparse_categorical_accuracy: 0.9478 - val_loss: 0.1425 - val_sparse_categorical_accuracy: 0.9600
+

The ModelCheckpoint callback can be used to implement +fault-tolerance: the ability to restart training from the last saved +state of the model in case training gets randomly interrupted. Here’s a +basic example:

+
+# Prepare a directory to store all the checkpoints.
+checkpoint_dir <- "./ckpt"
+fs::dir_create(checkpoint_dir)
+
+make_or_restore_model <- function() {
+  # Either restore the latest (best) model, or create a fresh one
+  # if there is no checkpoint available.
+  checkpoints <- Sys.glob(file.path(checkpoint_dir, "model-loss=*.keras"))
+
+  if (length(checkpoints) > 0) {
+    checkpoint_losses <- sub("^model-loss=([0-9.]+)\\.keras$", "\\1",
+                             basename(checkpoints)) |> as.numeric()
+    best_checkpoint <- checkpoints[which.min(checkpoint_losses)]
+    load_model(best_checkpoint)
+  } else {
+    get_compiled_model()
+  }
+}
+
+model <- make_or_restore_model()
+callbacks <- list(
+  # This callback saves the model every 100 batches.
+  # We include the training loss in the saved model name.
+  callback_model_checkpoint(
+    filepath = file.path(checkpoint_dir, "model-loss={loss:.2f}.keras"),
+    save_freq = 100
+  )
+)
+model |> fit(x_train, y_train, epochs = 1, callbacks = callbacks)
+
## 1563/1563 - 2s - 2ms/step - loss: 0.2932 - sparse_categorical_accuracy: 0.9145
+

You call also write your own callback for saving and restoring +models.

+

For a complete guide on serialization and saving, see the guide to saving and serializing +Models.

+
+
+

Using learning rate schedules +

+

A common pattern when training deep learning models is to gradually +reduce the learning as training progresses. This is generally known as +“learning rate decay”.

+

The learning decay schedule could be static (fixed in advance, as a +function of the current epoch or the current batch index), or dynamic +(responding to the current behavior of the model, in particular the +validation loss).

+
+

Passing a schedule to an optimizer +

+

You can easily use a static learning rate decay schedule by passing a +schedule object as the learning_rate argument in your +optimizer:

+
+initial_learning_rate <- 0.1
+lr_schedule <- learning_rate_schedule_exponential_decay(
+    initial_learning_rate, decay_steps=100000, decay_rate=0.96,
+    staircase=TRUE
+)
+
+optimizer <- optimizer_rmsprop(learning_rate = lr_schedule)
+

Several built-in schedules are available: +ExponentialDecay, PiecewiseConstantDecay, +PolynomialDecay, and InverseTimeDecay.

+
+
+

Using callbacks to implement a dynamic learning rate schedule +

+

A dynamic learning rate schedule (for instance, decreasing the +learning rate when the validation loss is no longer improving) cannot be +achieved with these schedule objects, since the optimizer does not have +access to validation metrics.

+

However, callbacks do have access to all metrics, including +validation metrics! You can thus achieve this pattern by using a +callback that modifies the current learning rate on the optimizer. In +fact, this is even built-in as +[callback_reduce_lr_on_plateau()].

+
+
+
+

Visualizing loss and metrics during training with TensorBoard +

+

The best way to keep an eye on your model during training is to use +TensorBoard – a +browser-based application that you can run locally that provides you +with:

+
    +
  • Live plots of the loss and metrics for training and evaluation
  • +
  • (optionally) Visualizations of the histograms of your layer +activations
  • +
  • (optionally) 3D visualizations of the embedding spaces learned by +your layer_embedding() +
  • +
+

If you have installed TensorFlow with pip, you should be able to +launch TensorBoard from the command line:

+
tensorboard --logdir=/full_path_to_your_logs
+

or from R using:

+
+tensorflow::tensorboard(logdir = "/full_path_to_your_logs")
+
+

Using the TensorBoard callback +

+

The easiest way to use TensorBoard with a Keras model and the +fit() method is with +[callback_tensorboard()].

+

In the simplest case, just specify where you want the callback to +write logs, and you’re good to go:

+
+tb_callback <- callback_tensorboard(
+  log_dir = "/full_path_to_your_logs",
+  histogram_freq = 0, # How often to log histogram visualizations
+  embeddings_freq = 0, # How often to log embedding visualizations
+  update_freq = "epoch", # How often to write logs (default: once per epoch)
+)
+

For more information, see callback_tensorboard().

+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/training_with_built_in_methods/unnamed-chunk-26-1.png b/docs/articles/training_with_built_in_methods/unnamed-chunk-26-1.png new file mode 100644 index 0000000000..7fea86b758 Binary files /dev/null and b/docs/articles/training_with_built_in_methods/unnamed-chunk-26-1.png differ diff --git a/docs/articles/transfer_learning.html b/docs/articles/transfer_learning.html new file mode 100644 index 0000000000..6e0e3e657f --- /dev/null +++ b/docs/articles/transfer_learning.html @@ -0,0 +1,742 @@ + + + + + + + + +Transfer learning & fine-tuning • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+ +
+
+

Introduction +

+

Transfer learning consists of taking features +learned on one problem, and leveraging them on a new, similar problem. +For instance, features from a model that has learned to identify raccoon +may be useful to kick-start a model meant to identify tanukis.

+

Transfer learning is usually done for tasks where your dataset has +too little data to train a full-scale model from scratch.

+

The most common incarnation of transfer learning in the context of +deep learning is the following workflow:

+
    +
  1. Take layers from a previously trained model.
  2. +
  3. Freeze them, so as to avoid destroying any of the information they +contain during future training rounds.
  4. +
  5. Add some new, trainable layers on top of the frozen layers. They +will learn to turn the old features into predictions on a new +dataset.
  6. +
  7. Train the new layers on your dataset.
  8. +
+

A last, optional step, is fine-tuning, which +consists of unfreezing the entire model you obtained above (or part of +it), and re-training it on the new data with a very low learning rate. +This can potentially achieve meaningful improvements, by incrementally +adapting the pretrained features to the new data.

+

First, we will go over the Keras trainable API in +detail, which underlies most transfer learning & fine-tuning +workflows.

+

Then, we’ll demonstrate the typical workflow by taking a model +pretrained on the ImageNet dataset, and retraining it on the Kaggle +“cats vs dogs” classification dataset.

+

This is adapted from Deep +Learning with Python and the 2016 blog post “building +powerful image classification models using very little data”.

+
+
+

Freezing layers: understanding the trainable +attribute +

+

Layers & models have three weight attributes:

+
    +
  • +weights is the list of all weights variables of the +layer.
  • +
  • +trainable_weights is the list of those that are meant +to be updated (via gradient descent) to minimize the loss during +training.
  • +
  • +non_trainable_weights is the list of those that aren’t +meant to be trained. Typically they are updated by the model during the +forward pass.
  • +
+

Example: the Dense layer has 2 trainable weights +(kernel & bias)

+
+layer <- layer_dense(units = 3)
+layer$build(shape(NULL, 4))  # Create the weights
+
+length(layer$weights)
+
## [1] 2
+
+length(layer$trainable_weights)
+
## [1] 2
+
+length(layer$non_trainable_weights)
+
## [1] 0
+

In general, all weights are trainable weights. The only built-in +layer that has non-trainable weights is +[layer_batch_normalization()]. It uses non-trainable +weights to keep track of the mean and variance of its inputs during +training. To learn how to use non-trainable weights in your own custom +layers, see the guide to +writing new layers from scratch.

+

Example: the BatchNormalization layer has 2 +trainable weights and 2 non-trainable weights

+
+layer <- layer_batch_normalization()
+layer$build(shape(NA, 4))  # Create the weights
+
+length(layer$weights)
+
## [1] 4
+
+length(layer$trainable_weights)
+
## [1] 2
+
+length(layer$non_trainable_weights)
+
## [1] 2
+

Layers & models also feature a boolean attribute +trainable. Its value can be changed. Setting +layer$trainable to FALSE moves all the layer’s +weights from trainable to non-trainable. This is called “freezing” the +layer: the state of a frozen layer won’t be updated during training +(either when training with fit() or when training with any +custom loop that relies on trainable_weights to apply +gradient updates).

+

Example: setting trainable to +False

+
+layer <- layer_dense(units = 3)
+layer$build(shape(NULL, 4))  # Create the weights
+layer$trainable <- FALSE  # Freeze the layer
+
+length(layer$weights)
+
## [1] 2
+
+length(layer$trainable_weights)
+
## [1] 0
+
+length(layer$non_trainable_weights)
+
## [1] 2
+

When a trainable weight becomes non-trainable, its value is no longer +updated during training.

+
+# Make a model with 2 layers
+layer1 <- layer_dense(units = 3, activation = "relu")
+layer2 <- layer_dense(units = 3, activation = "sigmoid")
+model <- keras_model_sequential(input_shape = 3) |>
+  layer1() |>
+  layer2()
+
+# Freeze the first layer
+layer1$trainable <- FALSE
+
+# Keep a copy of the weights of layer1 for later reference
+# (get_weights() returns a list of R arrays,
+#  layer$weights returns a list of KerasVariables)
+initial_layer1_weights_values <- get_weights(layer1)
+
+# Train the model
+model |> compile(optimizer = "adam", loss = "mse")
+model |> fit(random_normal(c(2, 3)), random_normal(c(2, 3)), epochs = 1)
+
## 1/1 - 1s - 567ms/step - loss: 2.1868
+
+# Check that the weights of layer1 have not changed during training
+final_layer1_weights_values <- get_weights(layer1)
+
+all.equal(initial_layer1_weights_values,
+          final_layer1_weights_values)
+
## [1] TRUE
+

Do not confuse the layer$trainable attribute with the +argument training in layer$call() (which +controls whether the layer should run its forward pass in inference mode +or training mode). For more information, see the Keras +FAQ.

+
+
+

Recursive setting of the trainable attribute +

+

If you set $trainable <- FALSE on a model or on any +layer that has sublayers, all children layers become non-trainable as +well.

+

Example:

+
+inner_model <- keras_model_sequential(input_shape = 3) |>
+  layer_dense(units = 3, activation = "relu") |>
+  layer_dense(units = 3, activation = "relu")
+
+model <- keras_model_sequential(input_shape = 3) |>
+  inner_model() |>
+  layer_dense(units = 3, activation = "sigmoid")
+
+model$trainable <- FALSE  # Freeze the outer model
+
+inner_model$trainable  # All layers in `model` are now frozen
+
## [1] FALSE
+
+inner_model$layers[[1]]$trainable  # `trainable` is propagated recursively
+
## [1] FALSE
+
+
+

The typical transfer-learning workflow +

+

This leads us to how a typical transfer learning workflow can be +implemented in Keras:

+
    +
  1. Instantiate a base model and load pre-trained weights into it.
  2. +
  3. Freeze all layers in the base model by setting +trainable <- FALSE.
  4. +
  5. Create a new model on top of the output of one (or several) layers +from the base model.
  6. +
  7. Train your new model on your new dataset.
  8. +
+

Note that an alternative, more lightweight workflow could also +be:

+
    +
  1. Instantiate a base model and load pre-trained weights into it.
  2. +
  3. Run your new dataset through it and record the output of one (or +several) layers from the base model. This is called feature +extraction.
  4. +
  5. Use that output as input data for a new, smaller model.
  6. +
+

A key advantage of that second workflow is that you only run the base +model once on your data, rather than once per epoch of training. So it’s +a lot faster & cheaper.

+

An issue with that second workflow, though, is that it doesn’t allow +you to dynamically modify the input data of your new model during +training, which is required when doing data augmentation, for instance. +Transfer learning is typically used for tasks when your new dataset has +too little data to train a full-scale model from scratch, and in such +scenarios data augmentation is very important. So in what follows, we +will focus on the first workflow.

+

Here’s what the first workflow looks like in Keras:

+

First, instantiate a base model with pre-trained weights.

+
+base_model <- application_xception(
+  weights = 'imagenet', # Load weights pre-trained on ImageNet.
+  input_shape = c(150, 150, 3),
+  include_top = FALSE  # Do not include the ImageNet classifier at the top.
+)
+

Then, freeze the base model.

+
+base_model$trainable <- FALSE
+

Create a new model on top.

+
+inputs <- keras_input(shape = c(150, 150, 3))
+# We make sure that the base_model is running in inference mode here,
+# by passing `training <- FALSE`. This is important for fine-tuning, as you will
+# learn in a few paragraphs.
+outputs <- inputs |>
+  base_model(training = FALSE) |>
+  # Convert features of shape `base_model$output_shape[-1]` to vectors
+  layer_global_average_pooling_2d() |>
+  # A Dense classifier with a single unit (binary classification)
+  layer_dense(1)
+
+model <- keras_model(inputs, outputs)
+

Train the model on new data.

+
+model |> compile(
+  optimizer = optimizer_adam(),
+  loss = loss_binary_crossentropy(from_logits = TRUE),
+  metrics = list(metric_binary_accuracy())
+)
+model |> fit(new_dataset, epochs = 20,
+             callbacks = ..., validation_data = ...)
+
+
+

Fine-tuning +

+

Once your model has converged on the new data, you can try to +unfreeze all or part of the base model and retrain the whole model +end-to-end with a very low learning rate.

+

This is an optional last step that can potentially give you +incremental improvements. It could also potentially lead to quick +overfitting – keep that in mind.

+

It is critical to only do this step after the model with +frozen layers has been trained to convergence. If you mix +randomly-initialized trainable layers with trainable layers that hold +pre-trained features, the randomly-initialized layers will cause very +large gradient updates during training, which will destroy your +pre-trained features.

+

It’s also critical to use a very low learning rate at this stage, +because you are training a much larger model than in the first round of +training, on a dataset that is typically very small. As a result, you +are at risk of overfitting very quickly if you apply large weight +updates. Here, you only want to readapt the pretrained weights in an +incremental way.

+

This is how to implement fine-tuning of the whole base model:

+
+# Unfreeze the base model
+base_model$trainable <- TRUE
+
+# It's important to recompile your model after you make any changes
+# to the `trainable` attribute of any inner layer, so that your changes
+# are take into account
+model |> compile(
+  optimizer = optimizer_adam(1e-5), # Very low learning rate
+  loss = loss_binary_crossentropy(from_logits = TRUE),
+  metrics = c(metric_binary_accuracy())
+)
+
+# Train end-to-end. Be careful to stop before you overfit!
+model |> fit(new_dataset, epochs = 10,
+             callbacks = ..., validation_data = ...)
+

Important note about compile() and +trainable

+

Calling compile() on a model is meant to “freeze” the +behavior of that model. This implies that the trainable +attribute values at the time the model is compiled should be preserved +throughout the lifetime of that model, until compile is +called again. Hence, if you change any trainable value, +make sure to call compile() again on your model for your +changes to be taken into account.

+

Important notes about BatchNormalization +layer

+

Many image models contain BatchNormalization layers. +That layer is a special case on every imaginable count. Here are a few +things to keep in mind.

+
    +
  • +BatchNormalization contains 2 non-trainable weights +that get updated during training. These are the variables tracking the +mean and variance of the inputs.
  • +
  • When you set bn_layer$trainable <- FALSE, the +BatchNormalization layer will run in inference mode, and +will not update its mean & variance statistics. This is not the case +for other layers in general, as weight +trainability & inference/training modes are two orthogonal +concepts. But the two are tied in the case of the +BatchNormalization layer.
  • +
  • When you unfreeze a model that contains +BatchNormalization layers in order to do fine-tuning, you +should keep the BatchNormalization layers in inference mode +by passing training = FALSE when calling the base model. +Otherwise the updates applied to the non-trainable weights will suddenly +destroy what the model has learned.
  • +
+

You’ll see this pattern in action in the end-to-end example at the +end of this guide.

+
+
+

An end-to-end example: fine-tuning an image classification model on +a cats vs. dogs dataset +

+

To solidify these concepts, let’s walk you through a concrete +end-to-end transfer learning & fine-tuning example. We will load the +Xception model, pre-trained on ImageNet, and use it on the Kaggle “cats +vs. dogs” classification dataset.

+
+

Getting the data +

+

First, let’s fetch the cats vs. dogs dataset using TFDS. If you have +your own dataset, you’ll probably want to use the utility +[image_dataset_from_directory()] to generate similar +labeled dataset objects from a set of images on disk filed into +class-specific folders.

+

Transfer learning is most useful when working with very small +datasets. To keep our dataset small, we will use 40% of the original +training data (25,000 images) for training, 10% for validation, and 10% +for testing.

+
+# reticulate::py_install("tensorflow-datasets")
+tfds <- reticulate::import("tensorflow_datasets")
+
+c(train_ds, validation_ds, test_ds) %<-% tfds$load(
+  "cats_vs_dogs",
+  # Reserve 10% for validation and 10% for test
+  split = c("train[:40%]", "train[40%:50%]", "train[50%:60%]"),
+  as_supervised = TRUE  # Include labels
+)
+
+length(train_ds)
+
## [1] 9305
+

These are the first 9 images in the training dataset – as you can +see, they’re all different sizes.

+
+library(tfdatasets, exclude = "shape")
+
+par(mfrow = c(3, 3), mar = c(1,0,1.5,0))
+train_ds |>
+  dataset_take(9) |>
+  as_array_iterator() |>
+  iterate(function(batch) {
+    c(image, label) %<-% batch
+    plot(as.raster(image, max = 255L))
+    title(sprintf(
+      "label: %s   size: %s",
+      label, paste(dim(image), collapse = " x ")))
+  })
+
+plot of chunk unnamed-chunk-13
plot of chunk unnamed-chunk-13
+
+

We can also see that label 1 is “dog” and label 0 is “cat”.

+
+
+

Standardizing the data +

+

Our raw images have a variety of sizes. In addition, each pixel +consists of 3 integer values between 0 and 255 (RGB level values). This +isn’t a great fit for feeding a neural network. We need to do 2 +things:

+
    +
  • Standardize to a fixed image size. We pick 150x150.
  • +
  • Normalize pixel values between -1 and 1. We’ll do this using a +Normalization layer as part of the model itself.
  • +
+

In general, it’s a good practice to develop models that take raw data +as input, as opposed to models that take already-preprocessed data. The +reason being that, if your model expects preprocessed data, any time you +export your model to use it elsewhere (in a web browser, in a mobile +app), you’ll need to reimplement the exact same preprocessing pipeline. +This gets very tricky very quickly. So we should do the least possible +amount of preprocessing before hitting the model.

+

Here, we’ll do image resizing in the data pipeline (because a deep +neural network can only process contiguous batches of data), and we’ll +do the input value scaling as part of the model, when we create it.

+

Let’s resize images to 150x150:

+
+resize_fn <- layer_resizing(width = 150, height = 150)
+resize_pair <- function(x, y) list(resize_fn(x), y)
+
+train_ds <- train_ds |> dataset_map(resize_pair)
+validation_ds <- validation_ds |> dataset_map(resize_pair)
+test_ds <- test_ds |> dataset_map(resize_pair)
+
+
+

Using random data augmentation +

+

When you don’t have a large image dataset, it’s a good practice to +artificially introduce sample diversity by applying random yet realistic +transformations to the training images, such as random horizontal +flipping or small random rotations. This helps expose the model to +different aspects of the training data while slowing down +overfitting.

+
+data_augmentation <- keras_model_sequential() |>
+  layer_random_flip("horizontal") |>
+  layer_random_rotation(.1)
+
+train_ds <- train_ds %>%
+  dataset_map(function(x, y) list(data_augmentation(x), y))
+

Let’s batch the data and use prefetching to optimize loading +speed.

+
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+batch_size <- 64
+
+train_ds <- train_ds |>
+  dataset_batch(batch_size) |>
+  dataset_prefetch()
+
+validation_ds <- validation_ds |>
+  dataset_batch(batch_size) |>
+  dataset_prefetch()
+
+test_ds <- test_ds |>
+  dataset_batch(batch_size) |>
+  dataset_prefetch()
+

Let’s visualize what the first image of the first batch looks like +after various random transformations:

+
+batch <- train_ds |>
+  dataset_take(1) |>
+  as_iterator() |>
+  iter_next()
+
+c(images, labels) %<-% batch
+first_image <- images[1, all_dims(), drop = TRUE]
+augmented_image <- data_augmentation(first_image, training = TRUE)
+
+plot_image <- function(image, main = deparse1(substitute(image))) {
+  image |>
+    as.array() |> # convert from tensor to R array
+    as.raster(max = 255) |>
+    plot()
+
+  if(!is.null(main))
+    title(main)
+}
+
+par(mfrow = c(2, 2), mar = c(1, 1, 1.5, 1))
+plot_image(first_image)
+plot_image(augmented_image)
+plot_image(data_augmentation(first_image, training = TRUE), "augmented 2")
+plot_image(data_augmentation(first_image, training = TRUE), "augmented 3")
+
+plot of chunk unnamed-chunk-17
plot of chunk unnamed-chunk-17
+
+
+
+
+

Build a model +

+

Now let’s built a model that follows the blueprint we’ve explained +earlier.

+

Note that:

+
    +
  • We add a Rescaling layer to scale input values +(initially in the [0, 255] range) to the +[-1, 1] range.
  • +
  • We add a Dropout layer before the classification layer, +for regularization.
  • +
  • We make sure to pass training=FALSE when calling the +base model, so that it runs in inference mode, so that batchnorm +statistics don’t get updated even after we unfreeze the base model for +fine-tuning.
  • +
+
+base_model <- application_xception(
+  weights = "imagenet", # Load weights pre-trained on ImageNet.
+  input_shape = c(150, 150, 3),
+  include_top = FALSE, # Do not include the ImageNet classifier at the top.
+)
+
+# Freeze the base_model
+base_model$trainable <- FALSE
+
+# Create new model on top
+inputs <- keras_input(shape = c(150, 150, 3))
+
+# Pre-trained Xception weights requires that input be scaled
+# from (0, 255) to a range of (-1., +1.), the rescaling layer
+# outputs: `(inputs * scale) + offset`
+scale_layer <- layer_rescaling(scale = 1 / 127.5, offset = -1)
+x <- scale_layer(inputs)
+
+# The base model contains batchnorm layers. We want to keep them in inference mode
+# when we unfreeze the base model for fine-tuning, so we make sure that the
+# base_model is running in inference mode here.
+outputs <- x |>
+  base_model(training = FALSE) |>
+  layer_global_average_pooling_2d() |>
+  layer_dropout(0.2) |>
+  layer_dense(1)
+
+model <- keras_model(inputs, outputs)
+
+summary(model, show_trainable = TRUE)
+
## Model: "functional_10"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┓
+## ┃ Layer (type)                 Output Shape              Param #  Trai… 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━┩
+## │ input_layer_7 (InputLayer)  │ (None, 150, 150, 3)   │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ rescaling (Rescaling)       │ (None, 150, 150, 3)   │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ xception (Functional)       │ (None, 5, 5, 2048)    │ 20,861,480N
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ global_average_pooling2d_1  │ (None, 2048)          │          0-
+## │ (GlobalAveragePooling2D)    │                       │            │       │
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dropout (Dropout)           │ (None, 2048)          │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dense_8 (Dense)             │ (None, 1)             │      2,049Y
+## └─────────────────────────────┴───────────────────────┴────────────┴───────┘
+##  Total params: 20,863,529 (79.59 MB)
+##  Trainable params: 2,049 (8.00 KB)
+##  Non-trainable params: 20,861,480 (79.58 MB)
+
+
+

Train the top layer +

+
+model |> compile(
+  optimizer = optimizer_adam(),
+  loss = loss_binary_crossentropy(from_logits = TRUE),
+  metrics = list(metric_binary_accuracy())
+)
+
+epochs <- 1
+model |> fit(train_ds, epochs = epochs, validation_data = validation_ds)
+
## 146/146 - 45s - 307ms/step - binary_accuracy: 0.9183 - loss: 0.1887 - val_binary_accuracy: 0.9669 - val_loss: 0.0926
+
+
+

Do a round of fine-tuning of the entire model +

+

Finally, let’s unfreeze the base model and train the entire model +end-to-end with a low learning rate.

+

Importantly, although the base model becomes trainable, it is still +running in inference mode since we passed training=FALSE +when calling it when we built the model. This means that the batch +normalization layers inside won’t update their batch statistics. If they +did, they would wreck havoc on the representations learned by the model +so far.

+
+# Unfreeze the base_model. Note that it keeps running in inference mode
+# since we passed `training=FALSE` when calling it. This means that
+# the batchnorm layers will not update their batch statistics.
+# This prevents the batchnorm layers from undoing all the training
+# we've done so far.
+base_model$trainable <- TRUE
+summary(model, show_trainable = TRUE)
+
## Model: "functional_10"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━┓
+## ┃ Layer (type)                 Output Shape              Param #  Trai… 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━┩
+## │ input_layer_7 (InputLayer)  │ (None, 150, 150, 3)   │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ rescaling (Rescaling)       │ (None, 150, 150, 3)   │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ xception (Functional)       │ (None, 5, 5, 2048)    │ 20,861,480Y
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ global_average_pooling2d_1  │ (None, 2048)          │          0-
+## │ (GlobalAveragePooling2D)    │                       │            │       │
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dropout (Dropout)           │ (None, 2048)          │          0-
+## ├─────────────────────────────┼───────────────────────┼────────────┼───────┤
+## │ dense_8 (Dense)             │ (None, 1)             │      2,049Y
+## └─────────────────────────────┴───────────────────────┴────────────┴───────┘
+##  Total params: 20,867,629 (79.60 MB)
+##  Trainable params: 20,809,001 (79.38 MB)
+##  Non-trainable params: 54,528 (213.00 KB)
+##  Optimizer params: 4,100 (16.02 KB)
+
+model |> compile(
+  optimizer = optimizer_adam(1e-5), # Low learning rate
+  loss = loss_binary_crossentropy(from_logits = TRUE),
+  metrics = list(metric_binary_accuracy())
+)
+
+epochs <- 1
+model |> fit(train_ds, epochs = epochs, validation_data = validation_ds)
+
## 146/146 - 77s - 524ms/step - binary_accuracy: 0.8660 - loss: 0.3213 - val_binary_accuracy: 0.9652 - val_loss: 0.1022
+

After 10 epochs, fine-tuning gains us a nice improvement here. Let’s +evaluate the model on the test dataset:

+
+model |> evaluate(test_ds)
+
## 37/37 - 2s - 41ms/step - binary_accuracy: 0.9540 - loss: 0.1103
+
## $binary_accuracy
+## [1] 0.9539983
+##
+## $loss
+## [1] 0.1102595
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/transfer_learning/unnamed-chunk-13-1.png b/docs/articles/transfer_learning/unnamed-chunk-13-1.png new file mode 100644 index 0000000000..2679c6251c Binary files /dev/null and b/docs/articles/transfer_learning/unnamed-chunk-13-1.png differ diff --git a/docs/articles/transfer_learning/unnamed-chunk-17-1.png b/docs/articles/transfer_learning/unnamed-chunk-17-1.png new file mode 100644 index 0000000000..6d11cbe6a3 Binary files /dev/null and b/docs/articles/transfer_learning/unnamed-chunk-17-1.png differ diff --git a/docs/articles/understanding_masking_and_padding.html b/docs/articles/understanding_masking_and_padding.html new file mode 100644 index 0000000000..3ec7bacfb0 --- /dev/null +++ b/docs/articles/understanding_masking_and_padding.html @@ -0,0 +1,522 @@ + + + + + + + + +Understanding masking & padding • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+ +
+
+

Introduction +

+

Masking is a way to tell sequence-processing layers +that certain timesteps in an input are missing, and thus should be +skipped when processing the data.

+

Padding is a special form of masking where the +masked steps are at the start or the end of a sequence. Padding comes +from the need to encode sequence data into contiguous batches: in order +to make all sequences in a batch fit a given standard length, it is +necessary to pad or truncate some sequences.

+

Let’s take a close look.

+
+
+

Padding sequence data +

+

When processing sequence data, it is very common for individual +samples to have different lengths. Consider the following example (text +tokenized as words):

+
+data <- list(
+  c("Hello", "world", "!"),
+  c("How", "are", "you", "doing", "today"),
+  c("The", "weather", "will", "be", "nice", "tomorrow")
+)
+

After vocabulary lookup, the data might be vectorized as integers, +e.g.:

+
+data <- list(
+  c(71, 1331, 4231),
+  c(73, 8, 3215, 55, 927),
+  c(83, 91, 1, 645, 1253, 927)
+)
+

The data is a nested list where individual samples have length 3, 5, +and 6, respectively. Since the input data for a deep learning model must +be a single tensor (of shape +e.g. (batch_size, 6, vocab_size) in this case), samples +that are shorter than the longest item need to be padded with some +placeholder value (alternatively, one might also truncate long samples +before padding short samples).

+

Keras provides a utility function to truncate and pad Python lists to +a common length: pad_sequences.

+
+raw_inputs <- list(
+  c(711, 632, 71),
+  c(73, 8, 3215, 55, 927),
+  c(83, 91, 1, 645, 1253, 927)
+)
+
+# By default, this will pad using 0s; it is configurable via the
+# "value" parameter.
+# Note that you could use "pre" padding (at the beginning) or
+# "post" padding (at the end).
+# We recommend using "post" padding when working with RNN layers
+# (in order to be able to use the
+# CuDNN implementation of the layers).
+padded_inputs <- pad_sequences(raw_inputs, padding="post")
+padded_inputs
+
##      [,1] [,2] [,3] [,4] [,5] [,6]
+## [1,]  711  632   71    0    0    0
+## [2,]   73    8 3215   55  927    0
+## [3,]   83   91    1  645 1253  927
+
+
+

Masking +

+

Now that all samples have a uniform length, the model must be +informed that some part of the data is actually padding and should be +ignored. That mechanism is masking.

+

There are three ways to introduce input masks in Keras models:

+
    +
  • Add a layer_masking layer.
  • +
  • Configure a layer_embedding layer with +mask_zero=TRUE.
  • +
  • Pass a mask argument manually when calling layers that +support this argument (e.g. RNN layers).
  • +
+
+
+

Mask-generating layers: Embedding and +Masking +

+

Under the hood, these layers will create a mask tensor (2D tensor +with shape (batch, sequence_length)), and attach it to the +tensor output returned by the Masking or +Embedding layer.

+
+embedding <- layer_embedding(input_dim=5000, output_dim=16, mask_zero=TRUE)
+masked_output <- embedding(padded_inputs)
+
+masked_output$`_keras_mask`
+
## tf.Tensor(
+## [[ True  True  True False False False]
+##  [ True  True  True  True  True False]
+##  [ True  True  True  True  True  True]], shape=(3, 6), dtype=bool)
+
+masking_layer <- layer_masking()
+# Simulate the embedding lookup by expanding the 2D input to 3D,
+# with embedding dimension of 10.
+unmasked_embedding <- op_cast(
+    op_tile(op_expand_dims(padded_inputs, axis=-1), c(1L, 1L, 10L)),
+    dtype="float32"
+)
+
+masked_embedding <- masking_layer(unmasked_embedding)
+masked_embedding$`_keras_mask`
+
## tf.Tensor(
+## [[ True  True  True False False False]
+##  [ True  True  True  True  True False]
+##  [ True  True  True  True  True  True]], shape=(3, 6), dtype=bool)
+

As you can see from the printed result, the mask is a 2D boolean +tensor with shape (batch_size, sequence_length), where each +individual FALSE entry indicates that the corresponding +timestep should be ignored during processing.

+
+
+

Mask propagation in the Functional API and Sequential API +

+

When using the Functional API or the Sequential API, a mask generated +by an Embedding or Masking layer will be +propagated through the network for any layer that is capable of using +them (for example, RNN layers). Keras will automatically fetch the mask +corresponding to an input and pass it to any layer that knows how to use +it.

+

For instance, in the following Sequential model, the +LSTM layer will automatically receive a mask, which means +it will ignore padded values:

+
+model <- keras_model_sequential() %>%
+  layer_embedding(input_dim=5000, output_dim=16, mask_zero=TRUE) %>%
+  layer_lstm(units=32)
+

This is also the case for the following Functional API model:

+
+inputs <- keras_input(shape = shape(NULL), dtype="int32")
+outputs <- inputs %>%
+  layer_embedding(input_dim=5000, output_dim=16, mask_zero=TRUE) %>%
+  layer_lstm(units=32)
+
+model <- keras_model(inputs, outputs)
+
+
+

Passing mask tensors directly to layers +

+

Layers that can handle masks (such as the LSTM layer) +have a mask argument in their call method.

+

Meanwhile, layers that produce a mask (e.g. Embedding) +expose a compute_mask(input, previous_mask) method which +you can call.

+

Thus, you can pass the output of the compute_mask() +method of a mask-producing layer to the call method of a +mask-consuming layer, like this:

+
+MyLayer <- new_layer_class(
+  "MyLayer",
+  initialize = function(...) {
+    super$initialize(...)
+    self$embedding <- layer_embedding(
+      input_dim=5000, output_dim=16, mask_zero=TRUE
+    )
+    self$lstm <- layer_lstm(units=32)
+  },
+  call = function(inputs) {
+    inputs %>%
+      self$embedding() %>%
+      # Note that you could also prepare a `mask` tensor manually.
+      # It only needs to be a boolean tensor
+      # with the right shape, i.e. (batch_size, timesteps).
+      self$lstm(mask=self$embedding$compute_mask(inputs))
+  }
+)
+
+layer <- MyLayer()
+x <- random_integer(c(32, 10), 0, 100)
+layer(x)
+
## tf.Tensor(
+## [[ 0.00130048 -0.00113367 -0.00715671 ... -0.00107615 -0.00162071
+##    0.00135018]
+##  [-0.004185    0.00726349  0.00520932 ...  0.00119117  0.00230441
+##    0.00174123]
+##  [-0.00537032 -0.00164898 -0.00238435 ... -0.00154158 -0.0038603
+##   -0.00105811]
+##  ...
+##  [ 0.00622133 -0.00905907 -0.00599518 ...  0.00025823 -0.00142478
+##   -0.00125036]
+##  [-0.00523904  0.00336683 -0.00299453 ...  0.00876719  0.00172074
+##    0.00903089]
+##  [-0.00393721  0.00058538  0.00503809 ... -0.00203075  0.00325885
+##   -0.00299755]], shape=(32, 32), dtype=float32)
+
+
+

Supporting masking in your custom layers +

+

Sometimes, you may need to write layers that generate a mask (like +Embedding), or layers that need to modify the current +mask.

+

For instance, any layer that produces a tensor with a different time +dimension than its input, such as a Concatenate layer that +concatenates on the time dimension, will need to modify the current mask +so that downstream layers will be able to properly take masked timesteps +into account.

+

To do this, your layer should implement the +layer.compute_mask() method, which produces a new mask +given the input and the current mask.

+

Here is an example of a TemporalSplit layer that needs +to modify the current mask.

+
+TemporalSplit <- new_layer_class(
+  "TemporalSplit",
+  call = function(inputs) {
+    # Expect the input to be 3D and mask to be 2D, split the input tensor into 2
+    # subtensors along the time axis (axis 1).
+    op_split(inputs, 2, axis=2)
+  },
+  compute_mask = function(inputs, mask = NULL) {
+    # Also split the mask into 2 if it presents.
+    if (!is.null(mask)) {
+      op_split(mask, 2, axis=2)
+    } else {
+      NULL
+    }
+  }
+)
+
+c(first_half, second_half) %<-% TemporalSplit(masked_embedding)
+first_half$`_keras_mask`
+
## tf.Tensor(
+## [[ True  True  True]
+##  [ True  True  True]
+##  [ True  True  True]], shape=(3, 3), dtype=bool)
+
+second_half$`_keras_mask`
+
## tf.Tensor(
+## [[False False False]
+##  [ True  True False]
+##  [ True  True  True]], shape=(3, 3), dtype=bool)
+

Here is another example of a CustomEmbedding layer that +is capable of generating a mask from input values:

+
+CustomEmbedding <- new_layer_class(
+  "CustomEmbedding",
+  initialize = function(input_dim, output_dim, mask_zero=FALSE, ...) {
+    super$initialize(...)
+    self$input_dim <- as.integer(input_dim)
+    self$output_dim <- as.integer(output_dim)
+    self$mask_zero <- mask_zero
+  },
+  build = function(input_shape) {
+    self$embeddings <- self$add_weight(
+      shape=c(self$input_dim, self$output_dim),
+      initializer="random_normal",
+      dtype="float32"
+    )
+  },
+  call = function(inputs) {
+    inputs <- op_cast(inputs, "int32")
+    op_take(self$embeddings, inputs)
+  },
+  compute_mask = function(inputs, mask=NULL) {
+    if (!self$mask_zero) {
+      NULL
+    } else {
+      op_not_equal(inputs, 0)
+    }
+  }
+)
+
+layer <- CustomEmbedding(input_dim = 10, output_dim = 32, mask_zero=TRUE)
+x <- random_integer(c(3, 10), 0, 9)
+
+y <- layer(x)
+mask <- layer$compute_mask(x)
+
+mask
+
## tf.Tensor(
+## [[ True  True  True  True  True  True  True  True  True  True]
+##  [ True  True  True  True  True  True  True  True  True  True]
+##  [False False  True  True  True  True  True  True  True  True]], shape=(3, 10), dtype=bool)
+

Note: For more details about format limitations related to masking, +see the serialization +guide.

+
+
+

Opting-in to mask propagation on compatible layers +

+

Most layers don’t modify the time dimension, so don’t need to modify +the current mask. However, they may still want to be able to +propagate the current mask, unchanged, to the next +layer. This is an opt-in behavior. By default, a custom +layer will destroy the current mask (since the framework has no way to +tell whether propagating the mask is safe to do).

+

If you have a custom layer that does not modify the time dimension, +and if you want it to be able to propagate the current input mask, you +should set self.supports_masking = True in the layer +constructor. In this case, the default behavior of +compute_mask() is to just pass the current mask +through.

+

Here’s an example of a layer that is whitelisted for mask +propagation:

+
+MyActivation <- new_layer_class(
+  "MyActivation",
+  initialize = function(...) {
+    super$initialize(...)
+    self$supports_masking <- TRUE
+  },
+  call = function(inputs) {
+    op_relu(inputs)
+  }
+)
+

You can now use this custom layer in-between a mask-generating layer +(like Embedding) and a mask-consuming layer (like +LSTM), and it will pass the mask along so that it reaches +the mask-consuming layer.

+
+inputs <- keras_input(shape = shape(NULL), dtype="int32")
+outputs <- inputs %>%
+  layer_embedding(input_dim=5000, output_dim=16, mask_zero=TRUE) %>%
+  MyActivation() %>%
+  layer_lstm(units=32)
+
+model <- keras_model(inputs, outputs)
+y <- model(random_integer(c(32, 100), 0, 5000))
+
+
+

Writing layers that need mask information +

+

Some layers are mask consumers: they accept a +mask argument in call and use it to determine +whether to skip certain time steps.

+

To write such a layer, you can simply add a mask=None +argument in your call signature. The mask associated with +the inputs will be passed to your layer whenever it is available.

+

Here’s a simple example below: a layer that computes a softmax over +the time dimension (axis 1) of an input sequence, while discarding +masked timesteps.

+
+TemporalSoftmax <- new_layer_class(
+  "TemporalSoftmax",
+  initialize = function(...) {
+    super$initialize(...)
+    self$supports_masking <- TRUE
+  },
+  call = function(inputs, mask=NULL) {
+    if (is.null(mask)) {
+      stop("`TemporalSoftmax` layer requires a previous layer to support masking.")
+    }
+    broadcast_float_mask <- op_expand_dims(op_cast(mask, "float32"), -1)
+    inputs_exp <- op_exp(inputs) * broadcast_float_mask
+    inputs_sum <- op_sum(inputs_exp * broadcast_float_mask, axis=-1, keepdims=TRUE)
+    inputs_exp / inputs_sum
+  }
+)
+
+inputs <- keras_input(shape = shape(NULL), dtype="int32")
+outputs <- inputs %>%
+  layer_embedding(input_dim=10, output_dim=32, mask_zero=TRUE) %>%
+  layer_dense(1) %>%
+  TemporalSoftmax()
+
+model <- keras_model(inputs, outputs)
+y <- model(random_integer(c(32, 100), 0, 10))
+
+
+

Summary +

+

That is all you need to know about padding & masking in Keras. To +recap:

+
    +
  • “Masking” is how layers are able to know when to skip / ignore +certain timesteps in sequence inputs.
  • +
  • Some layers are mask-generators: Embedding can generate +a mask from input values (if mask_zero=TRUE), and so can +the Masking layer.
  • +
  • Some layers are mask-consumers: they expose a mask +argument in their call method. This is the case for RNN +layers.
  • +
  • In the Functional API and Sequential API, mask information is +propagated automatically.
  • +
  • When using layers in a standalone way, you can pass the +mask arguments to layers manually.
  • +
  • You can easily write layers that modify the current mask, that +generate a new mask, or that consume the mask associated with the +inputs.
  • +
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/writing_a_custom_training_loop_in_jax.html b/docs/articles/writing_a_custom_training_loop_in_jax.html new file mode 100644 index 0000000000..3c11b136b9 --- /dev/null +++ b/docs/articles/writing_a_custom_training_loop_in_jax.html @@ -0,0 +1,589 @@ + + + + + + + + +Writing a training loop from scratch in JAX • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+
import os
+
+# This guide can only be run with the jax backend.
+os.environ["KERAS_BACKEND"] = "jax"
+
+import jax
+
+# We import TF so we can use tf.data.
+import tensorflow as tf
+import keras
+import numpy as np
+
+
+

Introduction +

+

Keras provides default training and evaluation loops, +fit() and evaluate(). Their usage is covered +in the guide Training +& evaluation with the built-in methods.

+

If you want to customize the learning algorithm of your model while +still leveraging the convenience of fit() (for instance, to +train a GAN using fit()), you can subclass the +Model class and implement your own +train_step() method, which is called repeatedly during +fit().

+

Now, if you want very low-level control over training & +evaluation, you should write your own training & evaluation loops +from scratch. This is what this guide is about.

+
+
+

A first end-to-end example +

+

To write a custom training loop, we need the following +ingredients:

+
    +
  • A model to train, of course.
  • +
  • An optimizer. You could either use an optimizer from +keras.optimizers, or one from the optax +package.
  • +
  • A loss function.
  • +
  • A dataset. The standard in the JAX ecosystem is to load data via +tf.data, so that’s what we’ll use.
  • +
+

Let’s line them up.

+

First, let’s get the model and the MNIST dataset:

+
def get_model():
+    inputs = keras.Input(shape=(784,), name="digits")
+    x1 = keras.layers.Dense(64, activation="relu")(inputs)
+    x2 = keras.layers.Dense(64, activation="relu")(x1)
+    outputs = keras.layers.Dense(10, name="predictions")(x2)
+    model = keras.Model(inputs=inputs, outputs=outputs)
+    return model
+
+
+model = get_model()
+
+# Prepare the training dataset.
+batch_size = 32
+(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+x_train = np.reshape(x_train, (-1, 784)).astype("float32")
+x_test = np.reshape(x_test, (-1, 784)).astype("float32")
+y_train = keras.utils.to_categorical(y_train)
+y_test = keras.utils.to_categorical(y_test)
+
+# Reserve 10,000 samples for validation.
+x_val = x_train[-10000:]
+y_val = y_train[-10000:]
+x_train = x_train[:-10000]
+y_train = y_train[:-10000]
+
+# Prepare the training dataset.
+train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
+train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
+
+# Prepare the validation dataset.
+val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
+val_dataset = val_dataset.batch(batch_size)
+

Next, here’s the loss function and the optimizer. We’ll use a Keras +optimizer in this case.

+
# Instantiate a loss function.
+loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
+
+# Instantiate an optimizer.
+optimizer = keras.optimizers.Adam(learning_rate=1e-3)
+
+

Getting gradients in JAX +

+

Let’s train our model using mini-batch gradient with a custom +training loop.

+

In JAX, gradients are computed via metaprogramming: you call +the jax.grad (or jax.value_and_grad on a +function in order to create a gradient-computing function for that first +function.

+

So the first thing we need is a function that returns the loss value. +That’s the function we’ll use to generate the gradient function. +Something like this:

+
def compute_loss(x, y):
+    ...
+    return loss
+

Once you have such a function, you can compute gradients via +metaprogramming as such:

+
grad_fn = jax.grad(compute_loss)
+grads = grad_fn(x, y)
+

Typically, you don’t just want to get the gradient values, you also +want to get the loss value. You can do this by using +jax.value_and_grad instead of jax.grad:

+
grad_fn = jax.value_and_grad(compute_loss)
+loss, grads = grad_fn(x, y)
+
+
+

JAX computation is purely stateless +

+

In JAX, everything must be a stateless function – so our loss +computation function must be stateless as well. That means that all +Keras variables (e.g. weight tensors) must be passed as function inputs, +and any variable that has been updated during the forward pass must be +returned as function output. The function have no side effect.

+

During the forward pass, the non-trainable variables of a Keras model +might get updated. These variables could be, for instance, RNG seed +state variables or BatchNormalization statistics. We’re going to need to +return those. So we need something like this:

+
def compute_loss_and_updates(trainable_variables, non_trainable_variables, x, y):
+    ...
+    return loss, non_trainable_variables
+

Once you have such a function, you can get the gradient function by +specifying hax_aux in value_and_grad: it tells +JAX that the loss computation function returns more outputs than just +the loss. Note that the loss should always be the first output.

+
grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)
+(loss, non_trainable_variables), grads = grad_fn(
+    trainable_variables, non_trainable_variables, x, y
+)
+

Now that we have established the basics, let’s implement this +compute_loss_and_updates function. Keras models have a +stateless_call method which will come in handy here. It +works just like model.__call__, but it requires you to +explicitly pass the value of all the variables in the model, and it +returns not just the __call__ outputs but also the +(potentially updated) non-trainable variables.

+
def compute_loss_and_updates(
+    trainable_variables, non_trainable_variables, x, y
+):
+    y_pred, non_trainable_variables = model.stateless_call(
+        trainable_variables, non_trainable_variables, x
+    )
+    loss = loss_fn(y, y_pred)
+    return loss, non_trainable_variables
+

Let’s get the gradient function:

+
grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)
+
+
+

The training step function +

+

Next, let’s implement the end-to-end training step, the function that +will both run the forward pass, compute the loss, compute the gradients, +but also use the optimizer to update the trainable variables. This +function also needs to be stateless, so it will get as input a +state tuple that includes every state element we’re going +to use:

+
    +
  • +trainable_variables and +non_trainable_variables: the model’s variables.
  • +
  • +optimizer_variables: the optimizer’s state variables, +such as momentum accumulators.
  • +
+

To update the trainable variables, we use the optimizer’s stateless +method stateless_apply. It’s equivalent to +optimizer.apply(), but it requires always passing +trainable_variables and optimizer_variables. +It returns both the updated trainable variables and the updated +optimizer_variables.

+
def train_step(state, data):
+    trainable_variables, non_trainable_variables, optimizer_variables = state
+    x, y = data
+    (loss, non_trainable_variables), grads = grad_fn(
+        trainable_variables, non_trainable_variables, x, y
+    )
+    trainable_variables, optimizer_variables = optimizer.stateless_apply(
+        optimizer_variables, grads, trainable_variables
+    )
+    # Return updated state
+    return loss, (
+        trainable_variables,
+        non_trainable_variables,
+        optimizer_variables,
+    )
+
+
+

Make it fast with jax.jit +

+

By default, JAX operations run eagerly, just like in TensorFlow eager +mode and PyTorch eager mode. And just like TensorFlow eager mode and +PyTorch eager mode, it’s pretty slow – eager mode is better used as a +debugging environment, not as a way to do any actual work. So let’s make +our train_step fast by compiling it.

+

When you have a stateless JAX function, you can compile it to XLA via +the @jax.jit decorator. It will get traced during its first +execution, and in subsequent executions you will be executing the traced +graph (this is just like @tf.function(jit_compile=True). +Let’s try it:

+
@jax.jit
+def train_step(state, data):
+    trainable_variables, non_trainable_variables, optimizer_variables = state
+    x, y = data
+    (loss, non_trainable_variables), grads = grad_fn(
+        trainable_variables, non_trainable_variables, x, y
+    )
+    trainable_variables, optimizer_variables = optimizer.stateless_apply(
+        optimizer_variables, grads, trainable_variables
+    )
+    # Return updated state
+    return loss, (
+        trainable_variables,
+        non_trainable_variables,
+        optimizer_variables,
+    )
+

We’re now ready to train our model. The training loop itself is +trivial: we just repeatedly call +loss, state = train_step(state, data).

+

Note:

+
    +
  • We convert the TF tensors yielded by the +tf.data.Dataset to NumPy before passing them to our JAX +function.
  • +
  • All variables must be built beforehand: the model must be built and +the optimizer must be built. Since we’re using a Functional API model, +it’s already built, but if it were a subclassed model you’d need to call +it on a batch of data to build it.
  • +
+
# Build optimizer variables.
+optimizer.build(model.trainable_variables)
+
+trainable_variables = model.trainable_variables
+non_trainable_variables = model.non_trainable_variables
+optimizer_variables = optimizer.variables
+state = trainable_variables, non_trainable_variables, optimizer_variables
+
+# Training loop
+for step, data in enumerate(train_dataset):
+    data = (data[0].numpy(), data[1].numpy())
+    loss, state = train_step(state, data)
+    # Log every 100 batches.
+    if step % 100 == 0:
+        print(f"Training loss (for 1 batch) at step {step}: {float(loss):.4f}")
+        print(f"Seen so far: {(step + 1) * batch_size} samples")
+

A key thing to notice here is that the loop is entirely stateless – +the variables attached to the model (model.weights) are +never getting updated during the loop. Their new values are only stored +in the state tuple. That means that at some point, before +saving the model, you should be attaching the new variable values back +to the model.

+

Just call variable.assign(new_value) on each model +variable you want to update:

+
trainable_variables, non_trainable_variables, optimizer_variables = state
+for variable, value in zip(model.trainable_variables, trainable_variables):
+    variable.assign(value)
+for variable, value in zip(
+    model.non_trainable_variables, non_trainable_variables
+):
+    variable.assign(value)
+
+
+
+

Low-level handling of metrics +

+

Let’s add metrics monitoring to this basic training loop.

+

You can readily reuse built-in Keras metrics (or custom ones you +wrote) in such training loops written from scratch. Here’s the flow:

+
    +
  • Instantiate the metric at the start of the loop
  • +
  • Include metric_variables in the train_step +arguments and compute_loss_and_updates arguments.
  • +
  • Call metric.stateless_update_state() in the +compute_loss_and_updates function. It’s equivalent to +update_state() – only stateless.
  • +
  • When you need to display the current value of the metric, outside +the train_step (in the eager scope), attach the new metric +variable values to the metric object and vall +metric.result().
  • +
  • Call metric.reset_state() when you need to clear the +state of the metric (typically at the end of an epoch)
  • +
+

Let’s use this knowledge to compute CategoricalAccuracy +on training and validation data at the end of training:

+
# Get a fresh model
+model = get_model()
+
+# Instantiate an optimizer to train the model.
+optimizer = keras.optimizers.Adam(learning_rate=1e-3)
+# Instantiate a loss function.
+loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
+
+# Prepare the metrics.
+train_acc_metric = keras.metrics.CategoricalAccuracy()
+val_acc_metric = keras.metrics.CategoricalAccuracy()
+
+
+def compute_loss_and_updates(
+    trainable_variables, non_trainable_variables, metric_variables, x, y
+):
+    y_pred, non_trainable_variables = model.stateless_call(
+        trainable_variables, non_trainable_variables, x
+    )
+    loss = loss_fn(y, y_pred)
+    metric_variables = train_acc_metric.stateless_update_state(
+        metric_variables, y, y_pred
+    )
+    return loss, (non_trainable_variables, metric_variables)
+
+
+grad_fn = jax.value_and_grad(compute_loss_and_updates, has_aux=True)
+
+
+@jax.jit
+def train_step(state, data):
+    (
+        trainable_variables,
+        non_trainable_variables,
+        optimizer_variables,
+        metric_variables,
+    ) = state
+    x, y = data
+    (loss, (non_trainable_variables, metric_variables)), grads = grad_fn(
+        trainable_variables, non_trainable_variables, metric_variables, x, y
+    )
+    trainable_variables, optimizer_variables = optimizer.stateless_apply(
+        optimizer_variables, grads, trainable_variables
+    )
+    # Return updated state
+    return loss, (
+        trainable_variables,
+        non_trainable_variables,
+        optimizer_variables,
+        metric_variables,
+    )
+

We’ll also prepare an evaluation step function:

+
@jax.jit
+def eval_step(state, data):
+    trainable_variables, non_trainable_variables, metric_variables = state
+    x, y = data
+    y_pred, non_trainable_variables = model.stateless_call(
+        trainable_variables, non_trainable_variables, x
+    )
+    loss = loss_fn(y, y_pred)
+    metric_variables = val_acc_metric.stateless_update_state(
+        metric_variables, y, y_pred
+    )
+    return loss, (
+        trainable_variables,
+        non_trainable_variables,
+        metric_variables,
+    )
+

Here are our loops:

+
# Build optimizer variables.
+optimizer.build(model.trainable_variables)
+
+trainable_variables = model.trainable_variables
+non_trainable_variables = model.non_trainable_variables
+optimizer_variables = optimizer.variables
+metric_variables = train_acc_metric.variables
+state = (
+    trainable_variables,
+    non_trainable_variables,
+    optimizer_variables,
+    metric_variables,
+)
+
+# Training loop
+for step, data in enumerate(train_dataset):
+    data = (data[0].numpy(), data[1].numpy())
+    loss, state = train_step(state, data)
+    # Log every 100 batches.
+    if step % 100 == 0:
+        print(f"Training loss (for 1 batch) at step {step}: {float(loss):.4f}")
+        _, _, _, metric_variables = state
+        for variable, value in zip(
+            train_acc_metric.variables, metric_variables
+        ):
+            variable.assign(value)
+        print(f"Training accuracy: {train_acc_metric.result()}")
+        print(f"Seen so far: {(step + 1) * batch_size} samples")
+
+metric_variables = val_acc_metric.variables
+(
+    trainable_variables,
+    non_trainable_variables,
+    optimizer_variables,
+    metric_variables,
+) = state
+state = trainable_variables, non_trainable_variables, metric_variables
+
+# Eval loop
+for step, data in enumerate(val_dataset):
+    data = (data[0].numpy(), data[1].numpy())
+    loss, state = eval_step(state, data)
+    # Log every 100 batches.
+    if step % 100 == 0:
+        print(
+            f"Validation loss (for 1 batch) at step {step}: {float(loss):.4f}"
+        )
+        _, _, metric_variables = state
+        for variable, value in zip(val_acc_metric.variables, metric_variables):
+            variable.assign(value)
+        print(f"Validation accuracy: {val_acc_metric.result()}")
+        print(f"Seen so far: {(step + 1) * batch_size} samples")
+
+
+

Low-level handling of losses tracked by the model +

+

Layers & models recursively track any losses created during the +forward pass by layers that call self.add_loss(value). The +resulting list of scalar loss values are available via the property +model.losses at the end of the forward pass.

+

If you want to be using these loss components, you should sum them +and add them to the main loss in your training step.

+

Consider this layer, that creates an activity regularization +loss:

+
class ActivityRegularizationLayer(keras.layers.Layer):
+    def call(self, inputs):
+        self.add_loss(1e-2 * jax.numpy.sum(inputs))
+        return inputs
+

Let’s build a really simple model that uses it:

+
inputs = keras.Input(shape=(784,), name="digits")
+x = keras.layers.Dense(64, activation="relu")(inputs)
+# Insert activity regularization as a layer
+x = ActivityRegularizationLayer()(x)
+x = keras.layers.Dense(64, activation="relu")(x)
+outputs = keras.layers.Dense(10, name="predictions")(x)
+
+model = keras.Model(inputs=inputs, outputs=outputs)
+

Here’s what our compute_loss_and_updates function should +look like now:

+
    +
  • Pass return_losses=True to +model.stateless_call().
  • +
  • Sum the resulting losses and add them to the main +loss.
  • +
+
def compute_loss_and_updates(
+    trainable_variables, non_trainable_variables, metric_variables, x, y
+):
+    y_pred, non_trainable_variables, losses = model.stateless_call(
+        trainable_variables, non_trainable_variables, x, return_losses=True
+    )
+    loss = loss_fn(y, y_pred)
+    if losses:
+        loss += jax.numpy.sum(losses)
+    metric_variables = train_acc_metric.stateless_update_state(
+        metric_variables, y, y_pred
+    )
+    return loss, non_trainable_variables, metric_variables
+

That’s it!

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/writing_a_custom_training_loop_in_tensorflow.html b/docs/articles/writing_a_custom_training_loop_in_tensorflow.html new file mode 100644 index 0000000000..52cb1888a6 --- /dev/null +++ b/docs/articles/writing_a_custom_training_loop_in_tensorflow.html @@ -0,0 +1,739 @@ + + + + + + + + +Writing a training loop from scratch in TensorFlow • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+
+library(tensorflow, exclude = c("shape", "set_random_seed"))
+library(tfdatasets, exclude = "shape")
+library(keras3)
+
+# This guide can only be run with the TensorFlow backend.
+use_backend("tensorflow")
+
+
+

Introduction +

+

Keras provides default training and evaluation loops, +fit() and evaluate(). Their usage is covered +in the guide Training +& evaluation with the built-in methods.

+

If you want to customize the learning algorithm of your model while +still leveraging the convenience of fit() (for instance, to +train a GAN using fit()), you can subclass the +Model class and implement your own +train_step() method, which is called repeatedly during +fit(). This is covered in the guide Customizing what happens in +fit().

+

Now, if you want very low-level control over training & +evaluation, you should write your own training & evaluation loops +from scratch. This is what this guide is about.

+
+
+

Using the GradientTape: a first end-to-end example +

+

Calling a model inside a GradientTape scope enables you +to retrieve the gradients of the trainable weights of the layer with +respect to a loss value. Using an optimizer instance, you can use these +gradients to update these variables (which you can retrieve using +model$trainable_weights).

+

Let’s consider a simple MNIST model:

+
+get_model <- function() {
+  inputs <- keras_input(shape = 784, name = "digits")
+  outputs <- inputs |>
+    layer_dense(units = 64, activation = "relu") |>
+    layer_dense(units = 64, activation = "relu") |>
+    layer_dense(units = 10, name = "predictions")
+  keras_model(inputs = inputs, outputs = outputs)
+}
+
+model <- get_model()
+

Let’s train it using mini-batch gradient with a custom training +loop.

+

First, we’re going to need an optimizer, a loss function, and a +dataset:

+
+# Instantiate an optimizer.
+optimizer <- optimizer_adam(learning_rate = 1e-3)
+# Instantiate a loss function.
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits = TRUE)
+
+# Prepare the training dataset.
+batch_size <- 64
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+x_train <- array_reshape(x_train, c(-1, 784))
+x_test <- array_reshape(x_test, c(-1, 784))
+
+# Reserve 10,000 samples for validation.
+val_i <- sample.int(nrow(x_train), 10000)
+x_val <- x_train[val_i,]
+y_val <- y_train[val_i]
+x_train %<>% .[-val_i,]
+y_train %<>% .[-val_i]
+
+# Prepare the training dataset.
+train_dataset <- list(x_train, y_train) |>
+  tensor_slices_dataset() |>
+  dataset_shuffle(buffer_size = 1024) |>
+  dataset_batch(batch_size)
+
+# Prepare the validation dataset.
+val_dataset <- list(x_val, y_val) |>
+  tensor_slices_dataset() |>
+  dataset_batch(batch_size)
+

Here’s our training loop:

+
    +
  • We open a for loop that iterates over epochs
  • +
  • For each epoch, we iterate over the dataset, in batches
  • +
  • For each batch, we open a GradientTape() scope
  • +
  • Inside this scope, we call the model (forward pass) and compute the +loss
  • +
  • Outside the scope, we retrieve the gradients of the weights of the +model with regard to the loss
  • +
  • Finally, we use the optimizer to update the weights of the model +based on the gradients
  • +
+
+epochs <- 3
+for (epoch in seq_len(epochs)) {
+  cat("Start of epoch ", epoch, "\n")
+   # Iterate over the batches of the dataset.
+  step <- 0
+  iterator <- as_iterator(train_dataset)
+  while (!is.null(batch <- iter_next(iterator))) {
+    c(x_batch_train, y_batch_train) %<-% batch
+    step <- step + 1
+    # Open a GradientTape to record the operations run
+    # during the forward pass, which enables auto-differentiation.
+    with(tf$GradientTape() %as% tape, {
+      # Run the forward pass of the layer.
+      # The operations that the layer applies
+      # to its inputs are going to be recorded
+      # on the GradientTape.
+      logits <- model(x_batch_train, training = TRUE) # Logits for this minibatch
+
+      # Compute the loss value for this minibatch.
+      loss_value <- loss_fn(y_batch_train, logits)
+    })
+
+    # Use the gradient tape to automatically retrieve
+    # the gradients of the trainable variables with respect to the loss.
+    gradients <- tape$gradient(loss_value, model$trainable_weights)
+
+    # Run one step of gradient descent by updating
+    # the value of the variables to minimize the loss.
+    optimizer$apply(gradients, model$trainable_weights)
+
+    # Log every 200 batches.
+    if (step %% 200 == 0) {
+      cat(sprintf("Training loss (for one batch) at step %d: %.4f\n",
+                  step, loss_value))
+      cat(sprintf("Seen so far: %d samples\n", (step * batch_size)))
+    }
+  }
+}
+
## Start of epoch  1
+## Training loss (for one batch) at step 200: 1.1675
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 1.6450
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.6477
+## Seen so far: 38400 samples
+## Start of epoch  2
+## Training loss (for one batch) at step 200: 0.6421
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.1827
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.4249
+## Seen so far: 38400 samples
+## Start of epoch  3
+## Training loss (for one batch) at step 200: 0.2715
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.3106
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.2905
+## Seen so far: 38400 samples
+
+
+

Low-level handling of metrics +

+

Let’s add metrics monitoring to this basic loop.

+

You can readily reuse the built-in metrics (or custom ones you wrote) +in such training loops written from scratch. Here’s the flow:

+
    +
  • Instantiate the metric at the start of the loop
  • +
  • Call metric$update_state() after each batch
  • +
  • Call metric$result() when you need to display the +current value of the metric
  • +
  • Call metric$reset_state() when you need to clear the +state of the metric (typically at the end of an epoch)
  • +
+

Let’s use this knowledge to compute +SparseCategoricalAccuracy on validation data at the end of +each epoch:

+
+# Get a fresh model
+model <- get_model()
+
+# Instantiate an optimizer to train the model.
+optimizer <- optimizer_adam(learning_rate = 1e-3)
+# Instantiate a loss function.
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits = TRUE)
+
+# Prepare the metrics.
+train_acc_metric <- metric_sparse_categorical_accuracy()
+val_acc_metric <- metric_sparse_categorical_accuracy()
+

Here’s our training & evaluation loop:

+
+epochs <- 2
+time <- Sys.time()
+for (epoch in seq_len(epochs)) {
+  cat("Start of epoch ", epoch, "\n")
+   # Iterate over the batches of the dataset.
+  step <- 0
+  train_dataset_iterator <- as_iterator(train_dataset)
+  while (!is.null(train_batch <- iter_next(train_dataset_iterator))) {
+    c(x_batch_train, y_batch_train) %<-% train_batch
+    step <- step + 1
+
+    with(tf$GradientTape() %as% tape, {
+      logits <- model(x_batch_train, training = TRUE)
+      loss_value <- loss_fn(y_batch_train, logits)
+    })
+    gradients <- tape$gradient(loss_value, model$trainable_weights)
+    optimizer$apply(gradients, model$trainable_weights)
+
+    # Update training metric.
+    train_acc_metric$update_state(y_batch_train, logits)
+    if (step %% 200 == 0) {
+      cat(sprintf(
+        "Training loss (for one batch) at step %d: %.4f\n", step, loss_value))
+      cat(sprintf("Seen so far: %d samples \n", (step * batch_size)))
+    }
+  }
+
+  # Display metrics at the end of each epoch.
+  train_acc <- train_acc_metric$result()
+  cat(sprintf("Training acc over epoch: %.4f\n", train_acc))
+
+  # Reset training metrics at the end of each epoch
+  train_acc_metric$reset_state()
+
+  # Run a validation loop at the end of each epoch.
+  iterate(val_dataset, function(val_batch) {
+    c(x_batch_val, y_batch_val) %<-% val_batch
+    val_logits <- model(x_batch_val, training = FALSE)
+    val_acc_metric$update_state(y_batch_val, val_logits)
+  })
+  val_acc <- val_acc_metric$result()
+  val_acc_metric$reset_state()
+  cat(sprintf("Validation acc: %.4f\n", val_acc))
+}
+
## Start of epoch  1
+## Training loss (for one batch) at step 200: 1.6268
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 1.2241
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.4987
+## Seen so far: 38400 samples
+## Training acc over epoch: 0.7844
+## Validation acc: 0.8680
+## Start of epoch  2
+## Training loss (for one batch) at step 200: 0.4626
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.4654
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.5022
+## Seen so far: 38400 samples
+## Training acc over epoch: 0.8837
+## Validation acc: 0.9031
+
+Sys.time() - time
+
## Time difference of 40.01661 secs
+
+
+

Speeding-up your training step with tf_function() +

+

The default runtime in TensorFlow 2 is eager execution. As +such, our training loop above executes eagerly.

+

This is great for debugging, but graph compilation has a definite +performance advantage. Describing your computation as a static graph +enables the framework to apply global performance optimizations. This is +impossible when the framework is constrained to greedily execute one +operation after another, with no knowledge of what comes next.

+

You can compile into a static graph any function that takes tensors +as input. Just add a @tf.function decorator on it, like +this:

+
+train_step <- tf_function(function(x, y) {
+  with(tf$GradientTape() %as% tape, {
+    logits <- model(x, training = TRUE)
+    loss_value <- loss_fn(y, logits)
+  })
+  gradients <- tape$gradient(loss_value, model$trainable_weights)
+  optimizer$apply(gradients, model$trainable_weights)
+  train_acc_metric$update_state(y, logits)
+  invisible(NULL) # return nothing
+})
+

Let’s do the same with the evaluation step:

+
+test_step <- tf_function(function(x, y) {
+  val_logits <- model(x, training=FALSE)
+  val_acc_metric$update_state(y, val_logits)
+  invisible(NULL) # return nothing
+})
+

Now, let’s re-run our training loop with this compiled training +step:

+
+epochs <- 2
+time <- Sys.time()
+for (epoch in seq_len(epochs)) {
+  cat("Start of epoch ", epoch, "\n")
+   # Iterate over the batches of the dataset.
+  step <- 0
+  while (!is.null(batch <- iter_next(iterator))) {
+    c(x_batch_train, y_batch_train) %<-% batch
+    step <- step + 1
+    train_step(x_batch_train, y_batch_train)
+
+    if (step %% 200 == 0) {
+      cat(sprintf(
+        "Training loss (for one batch) at step %d: %.4f\n", step, loss_value
+      ))
+      cat(sprintf("Seen so far: %d samples \n", (step * batch_size)))
+    }
+  }
+
+  # Display metrics at the end of each epoch.
+  train_acc <- train_acc_metric$result()
+  cat(sprintf("Training acc over epoch: %.4f\n", train_acc))
+
+   # Reset training metrics at the end of each epoch
+  train_acc_metric$reset_state()
+
+  # Run a validation loop at the end of each epoch.
+   iterate(val_dataset, function(val_batch) {
+    c(x_batch_val, y_batch_val) %<-% val_batch
+    test_step(x_batch_val, y_batch_val)
+  })
+
+  val_acc <- val_acc_metric$result()
+  val_acc_metric$reset_state()
+  cat(sprintf("Validation acc: %.4f\n", val_acc))
+}
+
## Start of epoch  1
+## Training acc over epoch: 0.0000
+## Validation acc: 0.9031
+## Start of epoch  2
+## Training acc over epoch: 0.0000
+## Validation acc: 0.9031
+
+Sys.time() - time
+
## Time difference of 0.3914127 secs
+

Much faster, isn’t it?

+
+
+

Low-level handling of losses tracked by the model +

+

Layers and models recursively track any losses created during the +forward pass by layers that call self$add_loss(value). The +resulting list of scalar loss values are available via the property +model$losses at the end of the forward pass.

+

If you want to be using these loss components, you should sum them +and add them to the main loss in your training step.

+

Consider this layer, that creates an activity regularization +loss:

+
+layer_activity_regularization <- Layer(
+  "ActivityRegularizationLayer",
+  call = function(inputs) {
+    self$add_loss(0.1 * op_mean(inputs))
+    inputs
+  }
+)
+

Let’s build a really simple model that uses it:

+
+inputs <- keras_input(shape = 784, name="digits")
+outputs <- inputs |>
+  layer_dense(units = 64, activation = "relu") |>
+  layer_activity_regularization() |>
+  layer_dense(units = 64, activation = "relu") |>
+  layer_dense(units = 10, name = "predictions")
+model <- keras_model(inputs = inputs, outputs = outputs)
+

Here’s what our training step should look like now:

+
+train_step <- tf_function(function(x, y) {
+  with(tf$GradientTape() %as% tape, {
+    logits <- model(x, training = TRUE)
+    loss_value <- Reduce(`+`, c(loss_fn(y, logits),
+                                model$losses))
+  })
+  gradients <- tape$gradient(loss_value, model$trainable_weights)
+  optimizer$apply(gradients, model$trainable_weights)
+  train_acc_metric$update_state(y, logits)
+  invisible(NULL)
+})
+
+
+

Summary +

+

Now you know everything there is to know about using built-in +training loops and writing your own from scratch.

+

To conclude, here’s a simple end-to-end example that ties together +everything you’ve learned in this guide: a DCGAN trained on MNIST +digits.

+
+
+

End-to-end example: a GAN training loop from scratch +

+

You may be familiar with Generative Adversarial Networks (GANs). GANs +can generate new images that look almost real, by learning the latent +distribution of a training dataset of images (the “latent space” of the +images).

+

A GAN is made of two parts: a “generator” model that maps points in +the latent space to points in image space, a “discriminator” model, a +classifier that can tell the difference between real images (from the +training dataset) and fake images (the output of the generator +network).

+

A GAN training loop looks like this:

+
    +
  1. Train the discriminator.
  2. +
+
    +
  • Sample a batch of random points in the latent space.
  • +
  • Turn the points into fake images via the “generator” model.
  • +
  • Get a batch of real images and combine them with the generated +images.
  • +
  • Train the “discriminator” model to classify generated vs. real +images.
  • +
+
    +
  1. Train the generator.
  2. +
+
    +
  • Sample random points in the latent space.
  • +
  • Turn the points into fake images via the “generator” network.
  • +
  • Get a batch of real images and combine them with the generated +images.
  • +
  • Train the “generator” model to “fool” the discriminator and classify +the fake images as real.
  • +
+

For a much more detailed overview of how GANs works, see Deep +Learning with Python.

+

Let’s implement this training loop. First, create the discriminator +meant to classify fake vs real digits:

+
+# Create the discriminator
+discriminator <-
+  keras_model_sequential(name = "discriminator",
+                         input_shape = c(28, 28, 1)) |>
+  layer_conv_2d(filters = 64, kernel_size = c(3, 3),
+                strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_conv_2d(filters = 128, kernel_size = c(3, 3),
+                strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_global_max_pooling_2d() |>
+  layer_dense(units = 1)
+
+summary(discriminator)
+
## Model: "discriminator"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv2d (Conv2D)                 │ (None, 14, 14, 64)     │           640
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu (LeakyReLU)         │ (None, 14, 14, 64)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_1 (Conv2D)               │ (None, 7, 7, 128)      │        73,856
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_1 (LeakyReLU)       │ (None, 7, 7, 128)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_max_pooling2d            │ (None, 128)            │             0
+## │ (GlobalMaxPooling2D)            │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_6 (Dense)                 │ (None, 1)              │           129
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 74,625 (291.50 KB)
+##  Trainable params: 74,625 (291.50 KB)
+##  Non-trainable params: 0 (0.00 B)
+

Then let’s create a generator network, that turns latent vectors into +outputs of shape (28, 28, 1) (representing MNIST +digits):

+
+latent_dim <- 128L
+
+generator <-
+  keras_model_sequential(name = "generator",
+                         input_shape = latent_dim) |>
+  layer_dense(7 * 7 * 128) |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_reshape(target_shape = c(7, 7, 128)) |>
+  layer_conv_2d_transpose(filters = 128, kernel_size = c(4, 4),
+                          strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_conv_2d_transpose(filters = 128, kernel_size = c(4, 4),
+                          strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_conv_2d(filters = 1, kernel_size = c(7, 7), padding = "same",
+                activation = "sigmoid")
+
+summary(generator)
+
## Model: "generator"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ dense_7 (Dense)                 │ (None, 6272)           │       809,088
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_2 (LeakyReLU)       │ (None, 6272)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ reshape (Reshape)               │ (None, 7, 7, 128)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose                │ (None, 14, 14, 128)    │       262,272
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_3 (LeakyReLU)       │ (None, 14, 14, 128)    │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_1              │ (None, 28, 28, 128)    │       262,272
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_4 (LeakyReLU)       │ (None, 28, 28, 128)    │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 28, 28, 1)      │         6,273
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 1,339,905 (5.11 MB)
+##  Trainable params: 1,339,905 (5.11 MB)
+##  Non-trainable params: 0 (0.00 B)
+

Here’s the key bit: the training loop. As you can see it is quite +straightforward. The training step function only takes 17 lines.

+
+# Instantiate one optimizer for the discriminator and another for the generator.
+d_optimizer <- optimizer_adam(learning_rate = 0.0003)
+g_optimizer <- optimizer_adam(learning_rate = 0.0004)
+
+# Instantiate a loss function.
+loss_fn <- loss_binary_crossentropy(from_logits = TRUE)
+
+train_step <- tf_function(function(real_images) {
+  # Sample random points in the latent space
+  c(batch_size, ...) %<-% op_shape(real_images)
+  random_latent_vectors <-
+    tf$random$normal(shape(batch_size, latent_dim))
+
+  # Decode them to fake images
+  generated_images <- generator(random_latent_vectors)
+
+  # Combine them with real images
+  combined_images <- tf$concat(list(generated_images, real_images),
+                               axis = 0L)
+
+  # Assemble labels discriminating real from fake images
+  labels <- tf$concat(list(tf$ones(shape(batch_size, 1)),
+                           tf$zeros(shape(batch_size, 1))),
+                      axis = 0L)
+
+  # Add random noise to the labels - important trick!
+  labels %<>% `+`(tf$random$uniform(tf$shape(.), maxval = 0.05))
+
+  # Train the discriminator
+  with(tf$GradientTape() %as% tape, {
+    predictions <- discriminator(combined_images)
+    d_loss <- loss_fn(labels, predictions)
+  })
+  grads <- tape$gradient(d_loss, discriminator$trainable_weights)
+  d_optimizer$apply(grads, discriminator$trainable_weights)
+
+  # Sample random points in the latent space
+  random_latent_vectors <-
+    tf$random$normal(shape(batch_size, latent_dim))
+
+  # Assemble labels that say "all real images"
+  misleading_labels <- tf$zeros(shape(batch_size, 1))
+
+  # Train the generator (note that we should *not* update the weights
+  # of the discriminator)!
+  with(tf$GradientTape() %as% tape, {
+    predictions <- discriminator(generator(random_latent_vectors))
+    g_loss <- loss_fn(misleading_labels, predictions)
+  })
+
+  grads <- tape$gradient(g_loss, generator$trainable_weights)
+  g_optimizer$apply(grads, generator$trainable_weights)
+
+  list(d_loss, g_loss, generated_images)
+})
+

Let’s train our GAN, by repeatedly calling train_step on +batches of images.

+

Since our discriminator and generator are convnets, you’re going to +want to run this code on a GPU.

+
+# Prepare the dataset. We use both the training & test MNIST digits.
+batch_size <- 64
+c(c(x_train, .), c(x_test, .)) %<-% dataset_mnist()
+all_digits <- op_concatenate(list(x_train, x_test))
+all_digits <- op_reshape(all_digits, c(-1, 28, 28, 1))
+dataset <- all_digits |>
+  tensor_slices_dataset() |>
+  dataset_map(\(x) op_cast(x, "float32") / 255) |>
+  dataset_shuffle(buffer_size = 1024) |>
+  dataset_batch(batch_size = batch_size)
+
+epochs <- 1 # In practice you need at least 20 epochs to generate nice digits.
+save_dir <- "./"
+
+for (epoch in seq_len(epochs)) {
+  cat("Start epoch: ", epoch, "\n")
+  step <- 0
+  train_iterator <- as_iterator(dataset)
+  while (!is.null(real_images <- iter_next(train_iterator))) {
+    step <- step + 1
+    # Train the discriminator & generator on one batch of real images.
+    c(d_loss, g_loss, generated_images) %<-% train_step(real_images)
+
+    # Logging.
+    if (step %% 200 == 0) {
+      # Print metrics
+      cat(sprintf("discriminator loss at step %d: %.2f\n", step, d_loss))
+      cat(sprintf("adversarial loss at step %d: %.2f\n", step, g_loss))
+    }
+
+    # To limit execution time we stop after 10 steps.
+    # Remove the lines below to actually train the model!
+    if (step > 10)
+      break
+  }
+}
+
## Start epoch:  1
+

That’s it! You’ll get nice-looking fake MNIST digits after just ~30s +of training on a GPU.

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/writing_a_custom_training_loop_in_torch.html b/docs/articles/writing_a_custom_training_loop_in_torch.html new file mode 100644 index 0000000000..bfe7bf4990 --- /dev/null +++ b/docs/articles/writing_a_custom_training_loop_in_torch.html @@ -0,0 +1,474 @@ + + + + + + + + +Writing a training loop from scratch in PyTorch • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+
import os
+
+# This guide can only be run with the torch backend.
+os.environ["KERAS_BACKEND"] = "torch"
+
+import torch
+import keras
+import numpy as np
+
+
+

Introduction +

+

Keras provides default training and evaluation loops, +fit() and evaluate(). Their usage is covered +in the guide Training +& evaluation with the built-in methods.

+

If you want to customize the learning algorithm of your model while +still leveraging the convenience of fit() (for instance, to +train a GAN using fit()), you can subclass the +Model class and implement your own +train_step() method, which is called repeatedly during +fit().

+

Now, if you want very low-level control over training & +evaluation, you should write your own training & evaluation loops +from scratch. This is what this guide is about.

+
+
+

A first end-to-end example +

+

To write a custom training loop, we need the following +ingredients:

+
    +
  • A model to train, of course.
  • +
  • An optimizer. You could either use a keras.optimizers +optimizer, or a native PyTorch optimizer from +torch.optim.
  • +
  • A loss function. You could either use a keras.losses +loss, or a native PyTorch loss from torch.nn.
  • +
  • A dataset. You could use any format: a tf.data.Dataset, +a PyTorch DataLoader, a Python generator, etc.
  • +
+

Let’s line them up. We’ll use torch-native objects in each case – +except, of course, for the Keras model.

+

First, let’s get the model and the MNIST dataset:

+
# Let's consider a simple MNIST model
+def get_model():
+    inputs = keras.Input(shape=(784,), name="digits")
+    x1 = keras.layers.Dense(64, activation="relu")(inputs)
+    x2 = keras.layers.Dense(64, activation="relu")(x1)
+    outputs = keras.layers.Dense(10, name="predictions")(x2)
+    model = keras.Model(inputs=inputs, outputs=outputs)
+    return model
+
+
+# Create load up the MNIST dataset and put it in a torch DataLoader
+# Prepare the training dataset.
+batch_size = 32
+(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
+x_train = np.reshape(x_train, (-1, 784)).astype("float32")
+x_test = np.reshape(x_test, (-1, 784)).astype("float32")
+y_train = keras.utils.to_categorical(y_train)
+y_test = keras.utils.to_categorical(y_test)
+
+# Reserve 10,000 samples for validation.
+x_val = x_train[-10000:]
+y_val = y_train[-10000:]
+x_train = x_train[:-10000]
+y_train = y_train[:-10000]
+
+# Create torch Datasets
+train_dataset = torch.utils.data.TensorDataset(
+    torch.from_numpy(x_train), torch.from_numpy(y_train)
+)
+val_dataset = torch.utils.data.TensorDataset(
+    torch.from_numpy(x_val), torch.from_numpy(y_val)
+)
+
+# Create DataLoaders for the Datasets
+train_dataloader = torch.utils.data.DataLoader(
+    train_dataset, batch_size=batch_size, shuffle=True
+)
+val_dataloader = torch.utils.data.DataLoader(
+    val_dataset, batch_size=batch_size, shuffle=False
+)
+

Next, here’s our PyTorch optimizer and our PyTorch loss function:

+
# Instantiate a torch optimizer
+model = get_model()
+optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+
+# Instantiate a torch loss function
+loss_fn = torch.nn.CrossEntropyLoss()
+

Let’s train our model using mini-batch gradient with a custom +training loop.

+

Calling loss.backward() on a loss tensor triggers +backpropagation. Once that’s done, your optimizer is magically aware of +the gradients for each variable and can update its variables, which is +done via optimizer.step(). Tensors, variables, optimizers +are all interconnected to one another via hidden global state. Also, +don’t forget to call model.zero_grad() before +loss.backward(), or you won’t get the right gradients for +your variables.

+

Here’s our training loop, step by step:

+
    +
  • We open a for loop that iterates over epochs
  • +
  • For each epoch, we open a for loop that iterates over +the dataset, in batches
  • +
  • For each batch, we call the model on the input data to retrive the +predictions, then we use them to compute a loss value
  • +
  • We call loss.backward() to
  • +
  • Outside the scope, we retrieve the gradients of the weights of the +model with regard to the loss
  • +
  • Finally, we use the optimizer to update the weights of the model +based on the gradients
  • +
+
epochs = 3
+for epoch in range(epochs):
+    for step, (inputs, targets) in enumerate(train_dataloader):
+        # Forward pass
+        logits = model(inputs)
+        loss = loss_fn(logits, targets)
+
+        # Backward pass
+        model.zero_grad()
+        loss.backward()
+
+        # Optimizer variable updates
+        optimizer.step()
+
+        # Log every 100 batches.
+        if step % 100 == 0:
+            print(
+                f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}"
+            )
+            print(f"Seen so far: {(step + 1) * batch_size} samples")
+

As an alternative, let’s look at what the loop looks like when using +a Keras optimizer and a Keras loss function.

+

Important differences:

+
    +
  • You retrieve the gradients for the variables via +v.value.grad, called on each trainable variable.
  • +
  • You update your variables via optimizer.apply(), which +must be called in a torch.no_grad() scope.
  • +
+

Also, a big gotcha: while all +NumPy/TensorFlow/JAX/Keras APIs as well as Python unittest +APIs use the argument order convention fn(y_true, y_pred) +(reference values first, predicted values second), PyTorch actually uses +fn(y_pred, y_true) for its losses. So make sure to invert +the order of logits and targets.

+
model = get_model()
+optimizer = keras.optimizers.Adam(learning_rate=1e-3)
+loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
+
+for epoch in range(epochs):
+    print(f"\nStart of epoch {epoch}")
+    for step, (inputs, targets) in enumerate(train_dataloader):
+        # Forward pass
+        logits = model(inputs)
+        loss = loss_fn(targets, logits)
+
+        # Backward pass
+        model.zero_grad()
+        trainable_weights = [v for v in model.trainable_weights]
+
+        # Call torch.Tensor.backward() on the loss to compute gradients
+        # for the weights.
+        loss.backward()
+        gradients = [v.value.grad for v in trainable_weights]
+
+        # Update weights
+        with torch.no_grad():
+            optimizer.apply(gradients, trainable_weights)
+
+        # Log every 100 batches.
+        if step % 100 == 0:
+            print(
+                f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}"
+            )
+            print(f"Seen so far: {(step + 1) * batch_size} samples")
+
+
+

Low-level handling of metrics +

+

Let’s add metrics monitoring to this basic training loop.

+

You can readily reuse built-in Keras metrics (or custom ones you +wrote) in such training loops written from scratch. Here’s the flow:

+
    +
  • Instantiate the metric at the start of the loop
  • +
  • Call metric.update_state() after each batch
  • +
  • Call metric.result() when you need to display the +current value of the metric
  • +
  • Call metric.reset_state() when you need to clear the +state of the metric (typically at the end of an epoch)
  • +
+

Let’s use this knowledge to compute CategoricalAccuracy +on training and validation data at the end of each epoch:

+
# Get a fresh model
+model = get_model()
+
+# Instantiate an optimizer to train the model.
+optimizer = keras.optimizers.Adam(learning_rate=1e-3)
+# Instantiate a loss function.
+loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
+
+# Prepare the metrics.
+train_acc_metric = keras.metrics.CategoricalAccuracy()
+val_acc_metric = keras.metrics.CategoricalAccuracy()
+

Here’s our training & evaluation loop:

+
for epoch in range(epochs):
+    print(f"\nStart of epoch {epoch}")
+    for step, (inputs, targets) in enumerate(train_dataloader):
+        # Forward pass
+        logits = model(inputs)
+        loss = loss_fn(targets, logits)
+
+        # Backward pass
+        model.zero_grad()
+        trainable_weights = [v for v in model.trainable_weights]
+
+        # Call torch.Tensor.backward() on the loss to compute gradients
+        # for the weights.
+        loss.backward()
+        gradients = [v.value.grad for v in trainable_weights]
+
+        # Update weights
+        with torch.no_grad():
+            optimizer.apply(gradients, trainable_weights)
+
+        # Update training metric.
+        train_acc_metric.update_state(targets, logits)
+
+        # Log every 100 batches.
+        if step % 100 == 0:
+            print(
+                f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}"
+            )
+            print(f"Seen so far: {(step + 1) * batch_size} samples")
+
+    # Display metrics at the end of each epoch.
+    train_acc = train_acc_metric.result()
+    print(f"Training acc over epoch: {float(train_acc):.4f}")
+
+    # Reset training metrics at the end of each epoch
+    train_acc_metric.reset_state()
+
+    # Run a validation loop at the end of each epoch.
+    for x_batch_val, y_batch_val in val_dataloader:
+        val_logits = model(x_batch_val, training=False)
+        # Update val metrics
+        val_acc_metric.update_state(y_batch_val, val_logits)
+    val_acc = val_acc_metric.result()
+    val_acc_metric.reset_state()
+    print(f"Validation acc: {float(val_acc):.4f}")
+
+
+

Low-level handling of losses tracked by the model +

+

Layers & models recursively track any losses created during the +forward pass by layers that call self.add_loss(value). The +resulting list of scalar loss values are available via the property +model.losses at the end of the forward pass.

+

If you want to be using these loss components, you should sum them +and add them to the main loss in your training step.

+

Consider this layer, that creates an activity regularization +loss:

+
class ActivityRegularizationLayer(keras.layers.Layer):
+    def call(self, inputs):
+        self.add_loss(1e-2 * torch.sum(inputs))
+        return inputs
+

Let’s build a really simple model that uses it:

+
inputs = keras.Input(shape=(784,), name="digits")
+x = keras.layers.Dense(64, activation="relu")(inputs)
+# Insert activity regularization as a layer
+x = ActivityRegularizationLayer()(x)
+x = keras.layers.Dense(64, activation="relu")(x)
+outputs = keras.layers.Dense(10, name="predictions")(x)
+
+model = keras.Model(inputs=inputs, outputs=outputs)
+

Here’s what our training loop should look like now:

+
# Get a fresh model
+model = get_model()
+
+# Instantiate an optimizer to train the model.
+optimizer = keras.optimizers.Adam(learning_rate=1e-3)
+# Instantiate a loss function.
+loss_fn = keras.losses.CategoricalCrossentropy(from_logits=True)
+
+# Prepare the metrics.
+train_acc_metric = keras.metrics.CategoricalAccuracy()
+val_acc_metric = keras.metrics.CategoricalAccuracy()
+
+for epoch in range(epochs):
+    print(f"\nStart of epoch {epoch}")
+    for step, (inputs, targets) in enumerate(train_dataloader):
+        # Forward pass
+        logits = model(inputs)
+        loss = loss_fn(targets, logits)
+        if model.losses:
+            loss = loss + torch.sum(*model.losses)
+
+        # Backward pass
+        model.zero_grad()
+        trainable_weights = [v for v in model.trainable_weights]
+
+        # Call torch.Tensor.backward() on the loss to compute gradients
+        # for the weights.
+        loss.backward()
+        gradients = [v.value.grad for v in trainable_weights]
+
+        # Update weights
+        with torch.no_grad():
+            optimizer.apply(gradients, trainable_weights)
+
+        # Update training metric.
+        train_acc_metric.update_state(targets, logits)
+
+        # Log every 100 batches.
+        if step % 100 == 0:
+            print(
+                f"Training loss (for 1 batch) at step {step}: {loss.detach().numpy():.4f}"
+            )
+            print(f"Seen so far: {(step + 1) * batch_size} samples")
+
+    # Display metrics at the end of each epoch.
+    train_acc = train_acc_metric.result()
+    print(f"Training acc over epoch: {float(train_acc):.4f}")
+
+    # Reset training metrics at the end of each epoch
+    train_acc_metric.reset_state()
+
+    # Run a validation loop at the end of each epoch.
+    for x_batch_val, y_batch_val in val_dataloader:
+        val_logits = model(x_batch_val, training=False)
+        # Update val metrics
+        val_acc_metric.update_state(y_batch_val, val_logits)
+    val_acc = val_acc_metric.result()
+    val_acc_metric.reset_state()
+    print(f"Validation acc: {float(val_acc):.4f}")
+

That’s it!

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/writing_a_training_loop_from_scratch.html b/docs/articles/writing_a_training_loop_from_scratch.html new file mode 100644 index 0000000000..58142c3bf9 --- /dev/null +++ b/docs/articles/writing_a_training_loop_from_scratch.html @@ -0,0 +1,741 @@ + + + + + + + + +Writing a training loop from scratch in TensorFlow • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Setup +

+
+library(tensorflow)
+library(tfdatasets)
+library(keras3, warn.conflicts = FALSE)
+
+# This guide can only be run with the TensorFlow backend.
+use_backend("tensorflow")
+
+
+

Introduction +

+

Keras provides default training and evaluation loops, +fit() and evaluate(). Their usage is covered +in the guide Training +& evaluation with the built-in methods.

+

If you want to customize the learning algorithm of your model while +still leveraging the convenience of fit() (for instance, to +train a GAN using fit()), you can subclass the +Model class and implement your own +train_step() method, which is called repeatedly during +fit(). This is covered in the guide Customizing what happens in +fit().

+

Now, if you want very low-level control over training & +evaluation, you should write your own training & evaluation loops +from scratch. This is what this guide is about.

+
+
+

Using the GradientTape: a first end-to-end example +

+

Calling a model inside a GradientTape scope enables you +to retrieve the gradients of the trainable weights of the layer with +respect to a loss value. Using an optimizer instance, you can use these +gradients to update these variables (which you can retrieve using +model$trainable_weights).

+

Let’s consider a simple MNIST model:

+
+get_model <- function() {
+  inputs <- keras_input(shape = 784, name = "digits")
+  outputs <- inputs |>
+    layer_dense(units = 64, activation = "relu") |>
+    layer_dense(units = 64, activation = "relu") |>
+    layer_dense(units = 10, name = "predictions")
+  keras_model(inputs = inputs, outputs = outputs)
+}
+
+model <- get_model()
+

Let’s train it using mini-batch gradient with a custom training +loop.

+

First, we’re going to need an optimizer, a loss function, and a +dataset:

+
+# Instantiate an optimizer.
+optimizer <- optimizer_adam(learning_rate = 1e-3)
+# Instantiate a loss function.
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits = TRUE)
+
+# Prepare the training dataset.
+batch_size <- 64
+c(c(x_train, y_train), c(x_test, y_test)) %<-% dataset_mnist()
+x_train <- array_reshape(x_train, c(-1, 784))
+x_test <- array_reshape(x_test, c(-1, 784))
+
+# Reserve 10,000 samples for validation.
+x_val <- x_train[1:10000, ]
+y_val <- y_train[1:10000]
+x_train = x_train[-c(1:10000), ]
+y_train = y_train[-c(1:10000)]
+
+# Prepare the training dataset.
+train_dataset <- list(x_train, y_train) |>
+  tensor_slices_dataset() |>
+  dataset_shuffle(buffer_size = 1024) |>
+  dataset_batch(batch_size)
+
+# Prepare the validation dataset.
+val_dataset <- list(x_val, y_val) |>
+  tensor_slices_dataset() |>
+  dataset_batch(batch_size)
+

Here’s our training loop:

+
    +
  • We open a for loop that iterates over epochs
  • +
  • For each epoch, we iterate over the dataset, in batches
  • +
  • For each batch, we open a GradientTape() scope
  • +
  • Inside this scope, we call the model (forward pass) and compute the +loss
  • +
  • Outside the scope, we retrieve the gradients of the weights of the +model with regard to the loss
  • +
  • Finally, we use the optimizer to update the weights of the model +based on the gradients
  • +
+
+epochs <- 3
+for (epoch in seq_len(epochs)) {
+  cat("\nStart of epoch ", epoch, "\n")
+   # Iterate over the batches of the dataset.
+  step <- 0
+  iterator <- as_iterator(train_dataset)
+  while (!is.null(batch <- iter_next(iterator))) {
+    c(x_batch_train, y_batch_train) %<-% batch
+    step <- step + 1
+    # Open a GradientTape to record the operations run
+    # during the forward pass, which enables auto-differentiation.
+    with(tf$GradientTape() %as% tape, {
+      # Run the forward pass of the layer.
+      # The operations that the layer applies
+      # to its inputs are going to be recorded
+      # on the GradientTape.
+      logits <- model(x_batch_train, training = TRUE) # Logits for this minibatch
+
+      # Compute the loss value for this minibatch.
+      loss_value <- loss_fn(y_batch_train, logits)
+    })
+
+    # Use the gradient tape to automatically retrieve
+    # the gradients of the trainable variables with respect to the loss.
+    gradients <- tape$gradient(loss_value, model$trainable_weights)
+
+    # Run one step of gradient descent by updating
+    # the value of the variables to minimize the loss.
+    optimizer$apply(gradients, model$trainable_weights)
+
+    # Log every 200 batches.
+    if (step %% 200 == 0) {
+      cat(sprintf("Training loss (for one batch) at step %d: %.4f\n",
+                  step, loss_value))
+      cat(sprintf("Seen so far: %d samples\n", (step * batch_size)))
+    }
+  }
+}
+
##
+## Start of epoch  1
+## Training loss (for one batch) at step 200: 1.9342
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.8244
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.8011
+## Seen so far: 38400 samples
+##
+## Start of epoch  2
+## Training loss (for one batch) at step 200: 0.3505
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.6878
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.3928
+## Seen so far: 38400 samples
+##
+## Start of epoch  3
+## Training loss (for one batch) at step 200: 0.3366
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.1102
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.3629
+## Seen so far: 38400 samples
+
+
+

Low-level handling of metrics +

+

Let’s add metrics monitoring to this basic loop.

+

You can readily reuse the built-in metrics (or custom ones you wrote) +in such training loops written from scratch. Here’s the flow:

+
    +
  • Instantiate the metric at the start of the loop
  • +
  • Call metric$update_state() after each batch
  • +
  • Call metric$result() when you need to display the +current value of the metric
  • +
  • Call metric$reset_state() when you need to clear the +state of the metric (typically at the end of an epoch)
  • +
+

Let’s use this knowledge to compute +SparseCategoricalAccuracy on validation data at the end of +each epoch:

+
+# Get model
+model <- get_model()
+
+# Instantiate an optimizer to train the model.
+optimizer <- optimizer_adam(learning_rate = 1e-3)
+# Instantiate a loss function.
+loss_fn <- loss_sparse_categorical_crossentropy(from_logits = TRUE)
+
+# Prepare the metrics.
+train_acc_metric <- metric_sparse_categorical_accuracy()
+val_acc_metric <- metric_sparse_categorical_accuracy()
+

Here’s our training & evaluation loop:

+
+epochs <- 2
+time <- Sys.time()
+for (epoch in seq_len(epochs)) {
+  cat("Start of epoch ", epoch, "\n")
+   # Iterate over the batches of the dataset.
+  step <- 0
+  iterator <- as_iterator(train_dataset)
+  while (!is.null(batch <- iter_next(iterator))) {
+    c(x_batch_train, y_batch_train) %<-% batch
+    step <- step + 1
+
+    with(tf$GradientTape() %as% tape, {
+      logits <- model(x_batch_train, training = TRUE)
+      loss_value <- loss_fn(y_batch_train, logits)
+    })
+    gradients <- tape$gradient(loss_value, model$trainable_weights)
+    optimizer$apply(gradients, model$trainable_weights)
+
+    # Update training metric.
+    train_acc_metric$update_state(y_batch_train, logits)
+    if (step %% 200 == 0) {
+      cat(sprintf(
+        "Training loss (for one batch) at step %d: %.4f\n", step, loss_value))
+      cat(sprintf("Seen so far: %d samples \n", (step * batch_size)))
+    }
+  }
+
+  # Display metrics at the end of each epoch.
+  train_acc <- train_acc_metric$result()
+  cat(sprintf("Training acc over epoch: %.4f\n", train_acc))
+
+  # Reset training metrics at the end of each epoch
+  train_acc_metric$reset_state()
+
+  # Run a validation loop at the end of each epoch.
+  iterate(val_dataset, function(val_batch) {
+    c(x_batch_val, y_batch_val) %<-% val_batch
+    val_logits <- model(x_batch_val, training = FALSE)
+    val_acc_metric$update_state(y_batch_val, val_logits)
+  })
+  val_acc <- val_acc_metric$result()
+  val_acc_metric$reset_state()
+  cat(sprintf("Validation acc: %.4f\n", val_acc))
+}
+
## Start of epoch  1
+## Training loss (for one batch) at step 200: 2.4070
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.8216
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 1.2056
+## Seen so far: 38400 samples
+## Training acc over epoch: 0.8094
+## Validation acc: 0.8687
+## Start of epoch  2
+## Training loss (for one batch) at step 200: 0.1857
+## Seen so far: 12800 samples
+## Training loss (for one batch) at step 400: 0.3803
+## Seen so far: 25600 samples
+## Training loss (for one batch) at step 600: 0.3399
+## Seen so far: 38400 samples
+## Training acc over epoch: 0.8859
+## Validation acc: 0.9002
+
+Sys.time() - time
+
## Time difference of 44.61786 secs
+
+
+

Speeding-up your training step with tf_function() +

+

The default runtime in TensorFlow 2 is eager execution. As +such, our training loop above executes eagerly.

+

This is great for debugging, but graph compilation has a definite +performance advantage. Describing your computation as a static graph +enables the framework to apply global performance optimizations. This is +impossible when the framework is constrained to greedily execute one +operation after another, with no knowledge of what comes next.

+

You can compile into a static graph any function that takes tensors +as input. Just add a @tf.function decorator on it, like +this:

+
+train_step <- tf_function(function(x, y) {
+  with(tf$GradientTape() %as% tape, {
+    logits <- model(x, training = TRUE)
+    loss_value <- loss_fn(y, logits)
+  })
+  gradients <- tape$gradient(loss_value, model$trainable_weights)
+  optimizer$apply(gradients, model$trainable_weights)
+  train_acc_metric$update_state(y, logits)
+  invisible(NULL) # return nothing
+})
+

Let’s do the same with the evaluation step:

+
+test_step <- tf_function(function(x, y) {
+  val_logits <- model(x, training=FALSE)
+  val_acc_metric$update_state(y, val_logits)
+  invisible(NULL) # return nothing
+})
+

Now, let’s re-run our training loop with this compiled training +step:

+
+epochs <- 2
+time <- Sys.time()
+for (epoch in seq_len(epochs)) {
+  cat("Start of epoch ", epoch, "\n")
+   # Iterate over the batches of the dataset.
+  step <- 0
+  while (!is.null(batch <- iter_next(iterator))) {
+    c(x_batch_train, y_batch_train) %<-% batch
+    step <- step + 1
+    train_step(x_batch_train, y_batch_train)
+
+    if (step %% 200 == 0) {
+      cat(sprintf(
+        "Training loss (for one batch) at step %d: %.4f\n", step, loss_value
+      ))
+      cat(sprintf("Seen so far: %d samples \n", (step * batch_size)))
+    }
+  }
+
+  # Display metrics at the end of each epoch.
+  train_acc <- train_acc_metric$result()
+  cat(sprintf("Training acc over epoch: %.4f\n", train_acc))
+
+   # Reset training metrics at the end of each epoch
+  train_acc_metric$reset_state()
+
+  # Run a validation loop at the end of each epoch.
+   iterate(val_dataset, function(val_batch) {
+    c(x_batch_val, y_batch_val) %<-% val_batch
+    test_step(x_batch_val, y_batch_val)
+  })
+
+  val_acc <- val_acc_metric$result()
+  val_acc_metric$reset_state()
+  cat(sprintf("Validation acc: %.4f\n", val_acc))
+}
+
## Start of epoch  1
+## Training acc over epoch: 0.0000
+## Validation acc: 0.9002
+## Start of epoch  2
+## Training acc over epoch: 0.0000
+## Validation acc: 0.9002
+
+Sys.time() - time
+
## Time difference of 0.5360565 secs
+

Much faster, isn’t it?

+
+
+

Low-level handling of losses tracked by the model +

+

Layers and models recursively track any losses created during the +forward pass by layers that call self$add_loss(value). The +resulting list of scalar loss values are available via the property +model$losses at the end of the forward pass.

+

If you want to be using these loss components, you should sum them +and add them to the main loss in your training step.

+

Consider this layer, that creates an activity regularization +loss:

+
+layer_activity_regularization <- Layer(
+  "ActivityRegularizationLayer",
+  call = function(inputs) {
+    self$add_loss(0.1 * op_mean(inputs))
+    inputs
+  }
+)
+

Let’s build a really simple model that uses it:

+
+inputs <- keras_input(shape = 784, name="digits")
+outputs <- inputs |>
+  layer_dense(units = 64, activation = "relu") |>
+  layer_activity_regularization() |>
+  layer_dense(units = 64, activation = "relu") |>
+  layer_dense(units = 10, name = "predictions")
+model <- keras_model(inputs = inputs, outputs = outputs)
+

Here’s what our training step should look like now:

+
+train_step <- tf_function(function(x, y) {
+  with(tf$GradientTape() %as% tape, {
+    logits <- model(x, training = TRUE)
+    loss_value <- Reduce(`+`, c(loss_fn(y, logits),
+                                model$losses))
+  })
+  gradients <- tape$gradient(loss_value, model$trainable_weights)
+  optimizer$apply(gradients, model$trainable_weights)
+  train_acc_metric$update_state(y, logits)
+  invisible(NULL)
+})
+
+
+

Summary +

+

Now you know everything there is to know about using built-in +training loops and writing your own from scratch.

+

To conclude, here’s a simple end-to-end example that ties together +everything you’ve learned in this guide: a DCGAN trained on MNIST +digits.

+
+
+

End-to-end example: a GAN training loop from scratch +

+

You may be familiar with Generative Adversarial Networks (GANs). GANs +can generate new images that look almost real, by learning the latent +distribution of a training dataset of images (the “latent space” of the +images).

+

A GAN is made of two parts: a “generator” model that maps points in +the latent space to points in image space, a “discriminator” model, a +classifier that can tell the difference between real images (from the +training dataset) and fake images (the output of the generator +network).

+

A GAN training loop looks like this:

+
    +
  1. Train the discriminator.
  2. +
+
    +
  • Sample a batch of random points in the latent space.
  • +
  • Turn the points into fake images via the “generator” model.
  • +
  • Get a batch of real images and combine them with the generated +images.
  • +
  • Train the “discriminator” model to classify generated vs. real +images.
  • +
+
    +
  1. Train the generator.
  2. +
+
    +
  • Sample random points in the latent space.
  • +
  • Turn the points into fake images via the “generator” network.
  • +
  • Get a batch of real images and combine them with the generated +images.
  • +
  • Train the “generator” model to “fool” the discriminator and classify +the fake images as real.
  • +
+

For a much more detailed overview of how GANs works, see Deep +Learning with Python.

+

Let’s implement this training loop. First, create the discriminator +meant to classify fake vs real digits:

+
+# Create the discriminator
+discriminator <-
+  keras_model_sequential(name = "discriminator",
+                         input_shape = c(28, 28, 1)) |>
+  layer_conv_2d(filters = 64, kernel_size = c(3, 3),
+                strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_conv_2d(filters = 128, kernel_size = c(3, 3),
+                strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_global_max_pooling_2d() |>
+  layer_dense(units = 1)
+
+summary(discriminator)
+
## Model: "discriminator"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ conv2d_1 (Conv2D)               │ (None, 14, 14, 64)     │           640
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_1 (LeakyReLU)       │ (None, 14, 14, 64)     │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d (Conv2D)                 │ (None, 7, 7, 128)      │        73,856
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu (LeakyReLU)         │ (None, 7, 7, 128)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ global_max_pooling2d            │ (None, 128)            │             0
+## │ (GlobalMaxPooling2D)            │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ dense_6 (Dense)                 │ (None, 1)              │           129
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 74,625 (291.50 KB)
+##  Trainable params: 74,625 (291.50 KB)
+##  Non-trainable params: 0 (0.00 B)
+

Then let’s create a generator network, that turns latent vectors into +outputs of shape (28, 28, 1) (representing MNIST +digits):

+
+latent_dim <- 128L
+
+generator <-
+  keras_model_sequential(name = "generator",
+                         input_shape = latent_dim) |>
+  layer_dense(7 * 7 * 128) |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_reshape(target_shape = c(7, 7, 128)) |>
+  layer_conv_2d_transpose(filters = 128, kernel_size = c(4, 4),
+                          strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_conv_2d_transpose(filters = 128, kernel_size = c(4, 4),
+                          strides = c(2, 2), padding = "same") |>
+  layer_activation_leaky_relu(negative_slope = 0.2) |>
+  layer_conv_2d(filters = 1, kernel_size = c(7, 7), padding = "same",
+                activation = "sigmoid")
+
+summary(generator)
+
## Model: "generator"
+## ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
+## ┃ Layer (type)                     Output Shape                  Param # 
+## ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
+## │ dense_7 (Dense)                 │ (None, 6272)           │       809,088
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_4 (LeakyReLU)       │ (None, 6272)           │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ reshape (Reshape)               │ (None, 7, 7, 128)      │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose_1              │ (None, 14, 14, 128)    │       262,272
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_3 (LeakyReLU)       │ (None, 14, 14, 128)    │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_transpose                │ (None, 28, 28, 128)    │       262,272
+## │ (Conv2DTranspose)               │                        │               │
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ leaky_re_lu_2 (LeakyReLU)       │ (None, 28, 28, 128)    │             0
+## ├─────────────────────────────────┼────────────────────────┼───────────────┤
+## │ conv2d_2 (Conv2D)               │ (None, 28, 28, 1)      │         6,273
+## └─────────────────────────────────┴────────────────────────┴───────────────┘
+##  Total params: 1,339,905 (5.11 MB)
+##  Trainable params: 1,339,905 (5.11 MB)
+##  Non-trainable params: 0 (0.00 B)
+

Here’s the key bit: the training loop. As you can see it is quite +straightforward. The training step function only takes 17 lines.

+
+# Instantiate one optimizer for the discriminator and another for the generator.
+d_optimizer <- optimizer_adam(learning_rate = 0.0003)
+g_optimizer <- optimizer_adam(learning_rate = 0.0004)
+
+# Instantiate a loss function.
+loss_fn <- loss_binary_crossentropy(from_logits = TRUE)
+
+train_step <- tf_function(function(real_images) {
+  # Sample random points in the latent space
+  c(batch_size, ...) %<-% op_shape(real_images)
+  random_latent_vectors <-
+    tf$random$normal(shape(batch_size, latent_dim))
+
+  # Decode them to fake images
+  generated_images <- generator(random_latent_vectors)
+
+  # Combine them with real images
+  combined_images <- tf$concat(list(generated_images, real_images),
+                               axis = 0L)
+
+  # Assemble labels discriminating real from fake images
+  labels <- tf$concat(list(tf$ones(shape(batch_size, 1)),
+                           tf$zeros(shape(batch_size, 1))),
+                      axis = 0L)
+
+  # Add random noise to the labels - important trick!
+  labels %<>% `+`(tf$random$uniform(tf$shape(.), maxval = 0.05))
+
+  # Train the discriminator
+  with(tf$GradientTape() %as% tape, {
+    predictions <- discriminator(combined_images)
+    d_loss <- loss_fn(labels, predictions)
+  })
+  grads <- tape$gradient(d_loss, discriminator$trainable_weights)
+  d_optimizer$apply(grads, discriminator$trainable_weights)
+
+  # Sample random points in the latent space
+  random_latent_vectors <-
+    tf$random$normal(shape(batch_size, latent_dim))
+
+  # Assemble labels that say "all real images"
+  misleading_labels <- tf$zeros(shape(batch_size, 1))
+
+  # Train the generator (note that we should *not* update the weights
+  # of the discriminator)!
+  with(tf$GradientTape() %as% tape, {
+    predictions <- discriminator(generator(random_latent_vectors))
+    g_loss <- loss_fn(misleading_labels, predictions)
+  })
+
+  grads <- tape$gradient(g_loss, generator$trainable_weights)
+  g_optimizer$apply(grads, generator$trainable_weights)
+
+  list(d_loss, g_loss, generated_images)
+})
+

Let’s train our GAN, by repeatedly calling train_step on +batches of images.

+

Since our discriminator and generator are convnets, you’re going to +want to run this code on a GPU.

+
+# Prepare the dataset. We use both the training & test MNIST digits.
+batch_size <- 64
+c(c(x_train, .), c(x_test, .)) %<-% dataset_mnist()
+all_digits <- op_concatenate(list(x_train, x_test))
+all_digits <- op_reshape(all_digits, c(-1, 28, 28, 1))
+dataset <- all_digits |>
+  tfdatasets::tensor_slices_dataset() |>
+  tfdatasets::dataset_map(function(x) tf$cast(x, "float") / 255) |>
+  tfdatasets::dataset_shuffle(buffer_size = 1024) |>
+  tfdatasets::dataset_batch(batch_size = batch_size)
+
+epochs <- 1 # In practice you need at least 20 epochs to generate nice digits.
+save_dir <- "./"
+
+for (epoch in seq_len(epochs)) {
+  cat("Start epoch: ", epoch, "\n")
+  step <- 0
+  train_iterator <- as_iterator(dataset)
+  while (!is.null(real_images <- iter_next(train_iterator))) {
+    step <- step + 1
+    # Train the discriminator & generator on one batch of real images.
+    c(d_loss, g_loss, generated_images) %<-% train_step(real_images)
+
+    # Logging.
+    if (step %% 200 == 0) {
+      # Print metrics
+      cat(sprintf("discriminator loss at step %d: %.2f\n", step, d_loss))
+      cat(sprintf("adversarial loss at step %d: %.2f\n", step, g_loss))
+    }
+
+    # To limit execution time we stop after 10 steps.
+    # Remove the lines below to actually train the model!
+    if (step > 10)
+      break
+  }
+}
+
## Start epoch:  1
+

That’s it! You’ll get nice-looking fake MNIST digits after just ~30s +of training on the Colab GPU.

+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/articles/writing_your_own_callbacks.html b/docs/articles/writing_your_own_callbacks.html new file mode 100644 index 0000000000..5e4ace42f9 --- /dev/null +++ b/docs/articles/writing_your_own_callbacks.html @@ -0,0 +1,1117 @@ + + + + + + + + +Writing your own callbacks • keras3 + + + + + + + + + + + + + + + + + Skip to contents + + +
+ + + + +
+
+ + + +
+

Introduction +

+

A callback is a powerful tool to customize the behavior of a Keras +model during training, evaluation, or inference. Examples include +keras.callbacks.TensorBoard to visualize training progress +and results with TensorBoard, or +keras.callbacks.ModelCheckpoint to periodically save your +model during training.

+

In this guide, you will learn what a Keras callback is, what it can +do, and how you can build your own. We provide a few demos of simple +callback applications to get you started.

+
+
+

Setup +

+ +
+
+

Keras callbacks overview +

+

All callbacks subclass the keras.callbacks.Callback +class, and override a set of methods called at various stages of +training, testing, and predicting. Callbacks are useful to get a view on +internal states and statistics of the model during training.

+

You can pass a list of callbacks (as the keyword argument +callbacks) to the following model methods:

+ +
+
+

An overview of callback methods +

+
+

Global methods +

+
+

+on_(train|test|predict)_begin(logs = NULL) +

+

Called at the beginning of +fit/evaluate/predict.

+
+
+

+on_(train|test|predict)_end(logs = NULL) +

+

Called at the end of +fit/evaluate/predict.

+
+
+
+

Batch-level methods for training/testing/predicting +

+
+

+on_(train|test|predict)_batch_begin(batch, logs = NULL) +

+

Called right before processing a batch during +training/testing/predicting.

+
+
+

+on_(train|test|predict)_batch_end(batch, logs = NULL) +

+

Called at the end of training/testing/predicting a batch. Within this +method, logs is a named list containing the metrics +results.

+
+
+
+

Epoch-level methods (training only) +

+
+

+on_epoch_begin(epoch, logs = NULL) +

+

Called at the beginning of an epoch during training.

+
+
+

+on_epoch_end(epoch, logs = NULL) +

+

Called at the end of an epoch during training.

+
+
+
+
+

A basic example +

+

Let’s take a look at a concrete example. To get started, let’s import +tensorflow and define a simple Sequential Keras model:

+
+# Define the Keras model to add callbacks to
+get_model <- function() {
+  model <- keras_model_sequential()
+  model |> layer_dense(units = 1)
+  model |> compile(
+    optimizer = optimizer_rmsprop(learning_rate = 0.1),
+    loss = "mean_squared_error",
+    metrics = "mean_absolute_error"
+  )
+  model
+}
+

Then, load the MNIST data for training and testing from Keras +datasets API:

+
+# Load example MNIST data and pre-process it
+mnist <- dataset_mnist()
+
+flatten_and_rescale <- function(x) {
+  x <- array_reshape(x, c(-1, 784))
+  x <- x / 255
+  x
+}
+
+mnist$train$x <- flatten_and_rescale(mnist$train$x)
+mnist$test$x  <- flatten_and_rescale(mnist$test$x)
+
+# limit to 1000 samples
+n <- 1000
+mnist$train$x <- mnist$train$x[1:n,]
+mnist$train$y <- mnist$train$y[1:n]
+mnist$test$x  <- mnist$test$x[1:n,]
+mnist$test$y  <- mnist$test$y[1:n]
+

Now, define a simple custom callback that logs:

+
    +
  • When fit/evaluate/predict +starts & ends
  • +
  • When each epoch starts & ends
  • +
  • When each training batch starts & ends
  • +
  • When each evaluation (test) batch starts & ends
  • +
  • When each inference (prediction) batch starts & ends
  • +
+
+show <- function(msg, logs) {
+  cat(glue::glue(msg, .envir = parent.frame()),
+      "got logs: ", sep = "; ")
+  str(logs); cat("\n")
+}
+
+callback_custom <- Callback(
+  "CustomCallback",
+  on_train_begin         = \(logs = NULL)        show("Starting training", logs),
+  on_epoch_begin         = \(epoch, logs = NULL) show("Start epoch {epoch} of training", logs),
+  on_train_batch_begin   = \(batch, logs = NULL) show("...Training: start of batch {batch}", logs),
+  on_train_batch_end     = \(batch, logs = NULL) show("...Training: end of batch {batch}",  logs),
+  on_epoch_end           = \(epoch, logs = NULL) show("End epoch {epoch} of training", logs),
+  on_train_end           = \(logs = NULL)        show("Stop training", logs),
+
+
+  on_test_begin          = \(logs = NULL)        show("Start testing", logs),
+  on_test_batch_begin    = \(batch, logs = NULL) show("...Evaluating: start of batch {batch}", logs),
+  on_test_batch_end      = \(batch, logs = NULL) show("...Evaluating: end of batch {batch}", logs),
+  on_test_end            = \(logs = NULL)        show("Stop testing", logs),
+
+  on_predict_begin       = \(logs = NULL)        show("Start predicting", logs),
+  on_predict_end         = \(logs = NULL)        show("Stop predicting", logs),
+  on_predict_batch_begin = \(batch, logs = NULL) show("...Predicting: start of batch {batch}", logs),
+  on_predict_batch_end   = \(batch, logs = NULL) show("...Predicting: end of batch {batch}", logs),
+)
+

Let’s try it out:

+
+model <- get_model()
+model |> fit(
+  mnist$train$x, mnist$train$y,
+  batch_size = 128,
+  epochs = 2,
+  verbose = 0,
+  validation_split = 0.5,
+  callbacks = list(callback_custom())
+)
+
## Starting training; got logs:  Named list()
+##
+## Start epoch 1 of training; got logs:  Named list()
+##
+## ...Training: start of batch 1; got logs:  Named list()
+##
+## ...Training: end of batch 1; got logs: List of 2
+##  $ loss               : num 25.9
+##  $ mean_absolute_error: num 4.19
+##
+## ...Training: start of batch 2; got logs:  Named list()
+##
+## ...Training: end of batch 2; got logs: List of 2
+##  $ loss               : num 433
+##  $ mean_absolute_error: num 15.5
+##
+## ...Training: start of batch 3; got logs:  Named list()
+##
+## ...Training: end of batch 3; got logs: List of 2
+##  $ loss               : num 297
+##  $ mean_absolute_error: num 11.8
+##
+## ...Training: start of batch 4; got logs:  Named list()
+##
+## ...Training: end of batch 4; got logs: List of 2
+##  $ loss               : num 231
+##  $ mean_absolute_error: num 9.68
+##
+## Start testing; got logs:  Named list()
+##
+## ...Evaluating: start of batch 1; got logs:  Named list()
+##
+## ...Evaluating: end of batch 1; got logs: List of 2
+##  $ loss               : num 8.1
+##  $ mean_absolute_error: num 2.3
+##
+## ...Evaluating: start of batch 2; got logs:  Named list()
+##
+## ...Evaluating: end of batch 2; got logs: List of 2
+##  $ loss               : num 7.58
+##  $ mean_absolute_error: num 2.23
+##
+## ...Evaluating: start of batch 3; got logs:  Named list()
+##
+## ...Evaluating: end of batch 3; got logs: List of 2
+##  $ loss               : num 7.38
+##  $ mean_absolute_error: num 2.21
+##
+## ...Evaluating: start of batch 4; got logs:  Named list()
+##
+## ...Evaluating: end of batch 4; got logs: List of 2
+##  $ loss               : num 7.3
+##  $ mean_absolute_error: num 2.21
+##
+## Stop testing; got logs: List of 2
+##  $ loss               : num 7.3
+##  $ mean_absolute_error: num 2.21
+##
+## End epoch 1 of training; got logs: List of 4
+##  $ loss                   : num 231
+##  $ mean_absolute_error    : num 9.68
+##  $ val_loss               : num 7.3
+##  $ val_mean_absolute_error: num 2.21
+##
+## Start epoch 2 of training; got logs:  Named list()
+##
+## ...Training: start of batch 1; got logs:  Named list()
+##
+## ...Training: end of batch 1; got logs: List of 2
+##  $ loss               : num 7.44
+##  $ mean_absolute_error: num 2.27
+##
+## ...Training: start of batch 2; got logs:  Named list()
+##
+## ...Training: end of batch 2; got logs: List of 2
+##  $ loss               : num 6.81
+##  $ mean_absolute_error: num 2.16
+##
+## ...Training: start of batch 3; got logs:  Named list()
+##
+## ...Training: end of batch 3; got logs: List of 2
+##  $ loss               : num 6.12
+##  $ mean_absolute_error: num 2.06
+##
+## ...Training: start of batch 4; got logs:  Named list()
+##
+## ...Training: end of batch 4; got logs: List of 2
+##  $ loss               : num 6.08
+##  $ mean_absolute_error: num 2.04
+##
+## Start testing; got logs:  Named list()
+##
+## ...Evaluating: start of batch 1; got logs:  Named list()
+##
+## ...Evaluating: end of batch 1; got logs: List of 2
+##  $ loss               : num 5.54
+##  $ mean_absolute_error: num 1.92
+##
+## ...Evaluating: start of batch 2; got logs:  Named list()
+##
+## ...Evaluating: end of batch 2; got logs: List of 2
+##  $ loss               : num 5.31
+##  $ mean_absolute_error: num 1.87
+##
+## ...Evaluating: start of batch 3; got logs:  Named list()
+##
+## ...Evaluating: end of batch 3; got logs: List of 2
+##  $ loss               : num 5.11
+##  $ mean_absolute_error: num 1.8
+##
+## ...Evaluating: start of batch 4; got logs:  Named list()
+##
+## ...Evaluating: end of batch 4; got logs: List of 2
+##  $ loss               : num 5.15
+##  $ mean_absolute_error: num 1.82
+##
+## Stop testing; got logs: List of 2
+##  $ loss               : num 5.15
+##  $ mean_absolute_error: num 1.82
+##
+## End epoch 2 of training; got logs: List of 4
+##  $ loss                   : num 6.08
+##  $ mean_absolute_error    : num 2.04
+##  $ val_loss               : num 5.15
+##  $ val_mean_absolute_error: num 1.82
+##
+## Stop training; got logs: List of 4
+##  $ loss                   : num 6.08
+##  $ mean_absolute_error    : num 2.04
+##  $ val_loss               : num 5.15
+##  $ val_mean_absolute_error: num 1.82
+
+res <- model |> evaluate(
+  mnist$test$x, mnist$test$y,
+  batch_size = 128, verbose = 0,
+  callbacks = list(callback_custom())
+)
+
## Start testing; got logs:  Named list()
+##
+## ...Evaluating: start of batch 1; got logs:  Named list()
+##
+## ...Evaluating: end of batch 1; got logs: List of 2
+##  $ loss               : num 5.2
+##  $ mean_absolute_error: num 1.84
+##
+## ...Evaluating: start of batch 2; got logs:  Named list()
+##
+## ...Evaluating: end of batch 2; got logs: List of 2
+##  $ loss               : num 4.62
+##  $ mean_absolute_error: num 1.73
+##
+## ...Evaluating: start of batch 3; got logs:  Named list()
+##
+## ...Evaluating: end of batch 3; got logs: List of 2
+##  $ loss               : num 4.61
+##  $ mean_absolute_error: num 1.74
+##
+## ...Evaluating: start of batch 4; got logs:  Named list()
+##
+## ...Evaluating: end of batch 4; got logs: List of 2
+##  $ loss               : num 4.65
+##  $ mean_absolute_error: num 1.75
+##
+## ...Evaluating: start of batch 5; got logs:  Named list()
+##
+## ...Evaluating: end of batch 5; got logs: List of 2
+##  $ loss               : num 4.84
+##  $ mean_absolute_error: num 1.77
+##
+## ...Evaluating: start of batch 6; got logs:  Named list()
+##
+## ...Evaluating: end of batch 6; got logs: List of 2
+##  $ loss               : num 4.76
+##  $ mean_absolute_error: num 1.76
+##
+## ...Evaluating: start of batch 7; got logs:  Named list()
+##
+## ...Evaluating: end of batch 7; got logs: List of 2
+##  $ loss               : num 4.74
+##  $ mean_absolute_error: num 1.76
+##
+## ...Evaluating: start of batch 8; got logs:  Named list()
+##
+## ...Evaluating: end of batch 8; got logs: List of 2
+##  $ loss               : num 4.67
+##  $ mean_absolute_error: num 1.75
+##
+## Stop testing; got logs: List of 2
+##  $ loss               : num 4.67
+##  $ mean_absolute_error: num 1.75
+
+res <- model |> predict(
+  mnist$test$x,
+  batch_size = 128, verbose = 0,
+  callbacks = list(callback_custom())
+)
+
## Start predicting; got logs:  Named list()
+##
+## ...Predicting: start of batch 1; got logs:  Named list()
+##
+## ...Predicting: end of batch 1; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(128, 1), dtype=float32, numpy=…>
+##
+## ...Predicting: start of batch 2; got logs:  Named list()
+##
+## ...Predicting: end of batch 2; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(128, 1), dtype=float32, numpy=…>
+##
+## ...Predicting: start of batch 3; got logs:  Named list()
+##
+## ...Predicting: end of batch 3; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(128, 1), dtype=float32, numpy=…>
+##
+## ...Predicting: start of batch 4; got logs:  Named list()
+##
+## ...Predicting: end of batch 4; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(128, 1), dtype=float32, numpy=…>
+##
+## ...Predicting: start of batch 5; got logs:  Named list()
+##
+## ...Predicting: end of batch 5; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(128, 1), dtype=float32, numpy=…>
+##
+## ...Predicting: start of batch 6; got logs:  Named list()
+##
+## ...Predicting: end of batch 6; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(128, 1), dtype=float32, numpy=…>
+##
+## ...Predicting: start of batch 7; got logs:  Named list()
+##
+## ...Predicting: end of batch 7; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(128, 1), dtype=float32, numpy=…>
+##
+## ...Predicting: start of batch 8; got logs:  Named list()
+##
+## ...Predicting: end of batch 8; got logs: List of 1
+##  $ outputs:<tf.Tensor: shape=(104, 1), dtype=float32, numpy=…>
+##
+## Stop predicting; got logs:  Named list()
+
+

Usage of logs list +

+

The logs named list contains the loss value, and all the +metrics at the end of a batch or epoch. Example includes the loss and +mean absolute error.

+
+callback_print_loss_and_mae <- Callback(
+  "LossAndErrorPrintingCallback",
+
+  on_train_batch_end = function(batch, logs = NULL)
+    cat(sprintf("Up to batch %i, the average loss is %7.2f.\n",
+                batch,  logs$loss)),
+
+  on_test_batch_end = function(batch, logs = NULL)
+    cat(sprintf("Up to batch %i, the average loss is %7.2f.\n",
+                batch, logs$loss)),
+
+  on_epoch_end = function(epoch, logs = NULL)
+    cat(sprintf(
+      "The average loss for epoch %2i is %9.2f and mean absolute error is %7.2f.\n",
+      epoch, logs$loss, logs$mean_absolute_error
+    ))
+)
+
+
+model <- get_model()
+model |> fit(
+  mnist$train$x, mnist$train$y,
+  epochs = 2, verbose = 0, batch_size = 128,
+  callbacks = list(callback_print_loss_and_mae())
+)
+
## Up to batch 1, the average loss is   25.12.
+## Up to batch 2, the average loss is  398.92.
+## Up to batch 3, the average loss is  274.04.
+## Up to batch 4, the average loss is  208.32.
+## Up to batch 5, the average loss is  168.15.
+## Up to batch 6, the average loss is  141.31.
+## Up to batch 7, the average loss is  122.19.
+## Up to batch 8, the average loss is  110.05.
+## The average loss for epoch  1 is    110.05 and mean absolute error is    5.79.
+## Up to batch 1, the average loss is    4.71.
+## Up to batch 2, the average loss is    4.74.
+## Up to batch 3, the average loss is    4.81.
+## Up to batch 4, the average loss is    5.07.
+## Up to batch 5, the average loss is    5.08.
+## Up to batch 6, the average loss is    5.09.
+## Up to batch 7, the average loss is    5.19.
+## Up to batch 8, the average loss is    5.51.
+## The average loss for epoch  2 is      5.51 and mean absolute error is    1.90.
+
+res = model |> evaluate(
+  mnist$test$x, mnist$test$y,
+  verbose = 0, batch_size = 128,
+  callbacks = list(callback_print_loss_and_mae())
+)
+
## Up to batch 1, the average loss is   15.86.
+## Up to batch 2, the average loss is   16.13.
+## Up to batch 3, the average loss is   16.02.
+## Up to batch 4, the average loss is   16.11.
+## Up to batch 5, the average loss is   16.23.
+## Up to batch 6, the average loss is   16.68.
+## Up to batch 7, the average loss is   16.61.
+## Up to batch 8, the average loss is   16.54.
+

For more information about callbacks, you can check out the Keras +callback API documentation.

+
+
+
+

Usage of self$model attribute +

+

In addition to receiving log information when one of their methods is +called, callbacks have access to the model associated with the current +round of training/evaluation/inference: self$model.

+

Here are of few of the things you can do with self$model +in a callback:

+
    +
  • Set self$model$stop_training <- TRUE to immediately +interrupt training.
  • +
  • Mutate hyperparameters of the optimizer (available as +self$model$optimizer), such as +self$model$optimizer$learning_rate.
  • +
  • Save the model at period intervals.
  • +
  • Record the output of model |> predict() on a few +test samples at the end of each epoch, to use as a sanity check during +training.
  • +
  • Extract visualizations of intermediate features at the end of each +epoch, to monitor what the model is learning over time.
  • +
  • etc.
  • +
+

Let’s see this in action in a couple of examples.

+
+
+

Examples of Keras callback applications +

+
+

Early stopping at minimum loss +

+

This first example shows the creation of a Callback that +stops training when the minimum of loss has been reached, by setting the +attribute self$model$stop_training (boolean). Optionally, +you can provide an argument patience to specify how many +epochs we should wait before stopping after having reached a local +minimum.

+

callback_early_stopping() provides a more complete and +general implementation.

+
+callback_early_stopping_at_min_loss <- Callback(
+  "EarlyStoppingAtMinLoss",
+  `__doc__` =
+    "Stop training when the loss is at its min, i.e. the loss stops decreasing.
+
+    Arguments:
+        patience: Number of epochs to wait after min has been hit. After this
+        number of no improvement, training stops.
+    ",
+
+  initialize = function(patience = 0) {
+    super$initialize()
+    self$patience <- patience
+    # best_weights to store the weights at which the minimum loss occurs.
+    self$best_weights <- NULL
+  },
+
+  on_train_begin = function(logs = NULL) {
+    # The number of epoch it has waited when loss is no longer minimum.
+    self$wait <- 0
+    # The epoch the training stops at.
+    self$stopped_epoch <- 0
+    # Initialize the best as infinity.
+    self$best <- Inf
+  },
+
+  on_epoch_end = function(epoch, logs = NULL) {
+    current <- logs$loss
+    if (current < self$best) {
+      self$best <- current
+      self$wait <- 0L
+      # Record the best weights if current results is better (less).
+      self$best_weights <- get_weights(self$model)
+    } else {
+      add(self$wait) <- 1L
+      if (self$wait >= self$patience) {
+        self$stopped_epoch <- epoch
+        self$model$stop_training <- TRUE
+        cat("Restoring model weights from the end of the best epoch.\n")
+        model$set_weights(self$best_weights)
+      }
+    }
+  },
+
+  on_train_end = function(logs = NULL)
+    if (self$stopped_epoch > 0)
+      cat(sprintf("Epoch %05d: early stopping\n", self$stopped_epoch + 1))
+)
+`add<-` <- `+`
+
+
+model <- get_model()
+model |> fit(
+  mnist$train$x,
+  mnist$train$y,
+  epochs = 30,
+  batch_size = 64,
+  verbose = 0,
+  callbacks = list(callback_print_loss_and_mae(),
+                   callback_early_stopping_at_min_loss())
+)
+
## Up to batch 1, the average loss is   30.54.
+## Up to batch 2, the average loss is  513.27.
+## Up to batch 3, the average loss is  352.60.
+## Up to batch 4, the average loss is  266.37.
+## Up to batch 5, the average loss is  214.68.
+## Up to batch 6, the average loss is  179.97.
+## Up to batch 7, the average loss is  155.06.
+## Up to batch 8, the average loss is  136.59.
+## Up to batch 9, the average loss is  121.96.
+## Up to batch 10, the average loss is  110.28.
+## Up to batch 11, the average loss is  100.72.
+## Up to batch 12, the average loss is   92.71.
+## Up to batch 13, the average loss is   85.95.
+## Up to batch 14, the average loss is   80.21.
+## Up to batch 15, the average loss is   75.17.
+## Up to batch 16, the average loss is   72.48.
+## The average loss for epoch  1 is     72.48 and mean absolute error is    4.08.
+## Up to batch 1, the average loss is    7.98.
+## Up to batch 2, the average loss is    9.92.
+## Up to batch 3, the average loss is   12.88.
+## Up to batch 4, the average loss is   16.61.
+## Up to batch 5, the average loss is   20.49.
+## Up to batch 6, the average loss is   26.14.
+## Up to batch 7, the average loss is   30.44.
+## Up to batch 8, the average loss is   33.76.
+## Up to batch 9, the average loss is   36.32.
+## Up to batch 10, the average loss is   35.26.
+## Up to batch 11, the average loss is   34.22.
+## Up to batch 12, the average loss is   33.53.
+## Up to batch 13, the average loss is   32.84.
+## Up to batch 14, the average loss is   31.80.
+## Up to batch 15, the average loss is   31.39.
+## Up to batch 16, the average loss is   31.45.
+## The average loss for epoch  2 is     31.45 and mean absolute error is    4.82.
+## Up to batch 1, the average loss is   39.60.
+## Up to batch 2, the average loss is   41.95.
+## Up to batch 3, the average loss is   41.29.
+## Up to batch 4, the average loss is   36.77.
+## Up to batch 5, the average loss is   32.08.
+## Up to batch 6, the average loss is   28.17.
+## Up to batch 7, the average loss is   25.33.
+## Up to batch 8, the average loss is   23.56.
+## Up to batch 9, the average loss is   22.28.
+## Up to batch 10, the average loss is   21.22.
+## Up to batch 11, the average loss is   20.87.
+## Up to batch 12, the average loss is   22.25.
+## Up to batch 13, the average loss is   25.08.
+## Up to batch 14, the average loss is   27.87.
+## Up to batch 15, the average loss is   31.72.
+## Up to batch 16, the average loss is   33.21.
+## The average loss for epoch  3 is     33.21 and mean absolute error is    4.79.
+## Restoring model weights from the end of the best epoch.
+## Epoch 00004: early stopping
+
+
+

Learning rate scheduling +

+

In this example, we show how a custom Callback can be used to +dynamically change the learning rate of the optimizer during the course +of training.

+

See keras$callbacks$LearningRateScheduler for a more +general implementations (in RStudio, press F1 while the cursor is over +LearningRateScheduler and a browser will open to this +page).

+
+callback_custom_learning_rate_scheduler <- Callback(
+  "CustomLearningRateScheduler",
+  `__doc__` =
+  "Learning rate scheduler which sets the learning rate according to schedule.
+
+    Arguments:
+        schedule: a function that takes an epoch index
+            (integer, indexed from 0) and current learning rate
+            as inputs and returns a new learning rate as output (float).
+    ",
+
+  initialize = function(schedule) {
+    super$initialize()
+    self$schedule <- schedule
+  },
+
+  on_epoch_begin = function(epoch, logs = NULL) {
+    ## When in doubt about what types of objects are in scope (e.g., self$model)
+    ## use a debugger to interact with the actual objects at the console!
+    # browser()
+
+    if (!"learning_rate" %in% names(self$model$optimizer))
+      stop('Optimizer must have a "learning_rate" attribute.')
+
+    # # Get the current learning rate from model's optimizer.
+    # use as.numeric() to convert the keras variablea to an R numeric
+    lr <- as.numeric(self$model$optimizer$learning_rate)
+    # # Call schedule function to get the scheduled learning rate.
+    scheduled_lr <- self$schedule(epoch, lr)
+    # # Set the value back to the optimizer before this epoch starts
+    optimizer <- self$model$optimizer
+    optimizer$learning_rate <- scheduled_lr
+    cat(sprintf("\nEpoch %03d: Learning rate is %6.4f.\n", epoch, scheduled_lr))
+  }
+)
+
+LR_SCHEDULE <- tibble::tribble(
+  ~start_epoch, ~learning_rate,
+             0,            0.1,
+             3,           0.05,
+             6,           0.01,
+             9,          0.005,
+            12,          0.001,
+  )
+
+last <- function(x) x[length(x)]
+lr_schedule <- function(epoch, learning_rate) {
+  "Helper function to retrieve the scheduled learning rate based on epoch."
+  with(LR_SCHEDULE, learning_rate[last(which(epoch >= start_epoch))])
+}
+
+model <- get_model()
+model |> fit(
+  mnist$train$x,
+  mnist$train$y,
+  epochs = 14,
+  batch_size = 64,
+  verbose = 0,
+  callbacks = list(
+    callback_print_loss_and_mae(),
+    callback_custom_learning_rate_scheduler(lr_schedule)
+  )
+)
+
##
+## Epoch 001: Learning rate is 0.1000.
+## Up to batch 1, the average loss is   29.36.
+## Up to batch 2, the average loss is  513.95.
+## Up to batch 3, the average loss is  352.70.
+## Up to batch 4, the average loss is  266.46.
+## Up to batch 5, the average loss is  214.73.
+## Up to batch 6, the average loss is  180.00.
+## Up to batch 7, the average loss is  155.05.
+## Up to batch 8, the average loss is  136.64.
+## Up to batch 9, the average loss is  121.97.
+## Up to batch 10, the average loss is  110.30.
+## Up to batch 11, the average loss is  100.76.
+## Up to batch 12, the average loss is   92.74.
+## Up to batch 13, the average loss is   85.95.
+## Up to batch 14, the average loss is   80.18.
+## Up to batch 15, the average loss is   75.11.
+## Up to batch 16, the average loss is   72.38.
+## The average loss for epoch  1 is     72.38 and mean absolute error is    4.04.
+##
+## Epoch 002: Learning rate is 0.1000.
+## Up to batch 1, the average loss is    6.95.
+## Up to batch 2, the average loss is    8.71.
+## Up to batch 3, the average loss is   11.42.
+## Up to batch 4, the average loss is   15.15.
+## Up to batch 5, the average loss is   19.28.
+## Up to batch 6, the average loss is   25.54.
+## Up to batch 7, the average loss is   30.38.
+## Up to batch 8, the average loss is   33.95.
+## Up to batch 9, the average loss is   36.58.
+## Up to batch 10, the average loss is   35.46.
+## Up to batch 11, the average loss is   34.34.
+## Up to batch 12, the average loss is   33.51.
+## Up to batch 13, the average loss is   32.67.
+## Up to batch 14, the average loss is   31.54.
+## Up to batch 15, the average loss is   31.05.
+## Up to batch 16, the average loss is   31.09.
+## The average loss for epoch  2 is     31.09 and mean absolute error is    4.77.
+##
+## Epoch 003: Learning rate is 0.0500.
+## Up to batch 1, the average loss is   40.40.
+## Up to batch 2, the average loss is   22.33.
+## Up to batch 3, the average loss is   16.18.
+## Up to batch 4, the average loss is   13.09.
+## Up to batch 5, the average loss is   11.48.
+## Up to batch 6, the average loss is   10.21.
+## Up to batch 7, the average loss is    9.22.
+## Up to batch 8, the average loss is    8.70.
+## Up to batch 9, the average loss is    8.16.
+## Up to batch 10, the average loss is    7.80.
+## Up to batch 11, the average loss is    7.50.
+## Up to batch 12, the average loss is    7.17.
+## Up to batch 13, the average loss is    6.89.
+## Up to batch 14, the average loss is    6.70.
+## Up to batch 15, the average loss is    6.52.
+## Up to batch 16, the average loss is    6.54.
+## The average loss for epoch  3 is      6.54 and mean absolute error is    1.93.
+##
+## Epoch 004: Learning rate is 0.0500.
+## Up to batch 1, the average loss is    8.74.
+## Up to batch 2, the average loss is    8.34.
+## Up to batch 3, the average loss is    9.09.
+## Up to batch 4, the average loss is    9.72.
+## Up to batch 5, the average loss is   10.48.
+## Up to batch 6, the average loss is   11.69.
+## Up to batch 7, the average loss is   11.83.
+## Up to batch 8, the average loss is   11.56.
+## Up to batch 9, the average loss is   11.24.
+## Up to batch 10, the average loss is   10.84.
+## Up to batch 11, the average loss is   10.66.
+## Up to batch 12, the average loss is   10.44.
+## Up to batch 13, the average loss is   10.21.
+## Up to batch 14, the average loss is   10.06.
+## Up to batch 15, the average loss is   10.00.
+## Up to batch 16, the average loss is   10.20.
+## The average loss for epoch  4 is     10.20 and mean absolute error is    2.71.
+##
+## Epoch 005: Learning rate is 0.0500.
+## Up to batch 1, the average loss is   17.26.
+## Up to batch 2, the average loss is   14.09.
+## Up to batch 3, the average loss is   12.67.
+## Up to batch 4, the average loss is   11.44.
+## Up to batch 5, the average loss is   10.54.
+## Up to batch 6, the average loss is   10.10.
+## Up to batch 7, the average loss is    9.53.
+## Up to batch 8, the average loss is    9.17.
+## Up to batch 9, the average loss is    8.78.
+## Up to batch 10, the average loss is    8.49.
+## Up to batch 11, the average loss is    8.50.
+## Up to batch 12, the average loss is    8.59.
+## Up to batch 13, the average loss is    8.68.
+## Up to batch 14, the average loss is    8.86.
+## Up to batch 15, the average loss is    9.17.
+## Up to batch 16, the average loss is    9.53.
+## The average loss for epoch  5 is      9.53 and mean absolute error is    2.58.
+##
+## Epoch 006: Learning rate is 0.0100.
+## Up to batch 1, the average loss is   17.04.
+## Up to batch 2, the average loss is   14.85.
+## Up to batch 3, the average loss is   11.53.
+## Up to batch 4, the average loss is    9.65.
+## Up to batch 5, the average loss is    8.44.
+## Up to batch 6, the average loss is    7.50.
+## Up to batch 7, the average loss is    6.74.
+## Up to batch 8, the average loss is    6.56.
+## Up to batch 9, the average loss is    6.18.
+## Up to batch 10, the average loss is    5.87.
+## Up to batch 11, the average loss is    5.63.
+## Up to batch 12, the average loss is    5.45.
+## Up to batch 13, the average loss is    5.23.
+## Up to batch 14, the average loss is    5.12.
+## Up to batch 15, the average loss is    4.96.
+## Up to batch 16, the average loss is    4.91.
+## The average loss for epoch  6 is      4.91 and mean absolute error is    1.67.
+##
+## Epoch 007: Learning rate is 0.0100.
+## Up to batch 1, the average loss is    3.65.
+## Up to batch 2, the average loss is    3.04.
+## Up to batch 3, the average loss is    2.88.
+## Up to batch 4, the average loss is    2.85.
+## Up to batch 5, the average loss is    2.88.
+## Up to batch 6, the average loss is    2.81.
+## Up to batch 7, the average loss is    2.70.
+## Up to batch 8, the average loss is    2.96.
+## Up to batch 9, the average loss is    2.96.
+## Up to batch 10, the average loss is    2.93.
+## Up to batch 11, the average loss is    2.95.
+## Up to batch 12, the average loss is    2.98.
+## Up to batch 13, the average loss is    2.97.
+## Up to batch 14, the average loss is    3.01.
+## Up to batch 15, the average loss is    3.00.
+## Up to batch 16, the average loss is    3.05.
+## The average loss for epoch  7 is      3.05 and mean absolute error is    1.34.
+##
+## Epoch 008: Learning rate is 0.0100.
+## Up to batch 1, the average loss is    3.69.
+## Up to batch 2, the average loss is    3.21.
+## Up to batch 3, the average loss is    3.00.
+## Up to batch 4, the average loss is    2.91.
+## Up to batch 5, the average loss is    2.94.
+## Up to batch 6, the average loss is    2.85.
+## Up to batch 7, the average loss is    2.72.
+## Up to batch 8, the average loss is    2.95.
+## Up to batch 9, the average loss is    2.97.
+## Up to batch 10, the average loss is    2.93.
+## Up to batch 11, the average loss is    2.96.
+## Up to batch 12, the average loss is    2.98.
+## Up to batch 13, the average loss is    2.99.
+## Up to batch 14, the average loss is    3.05.
+## Up to batch 15, the average loss is    3.08.
+## Up to batch 16, the average loss is    3.14.
+## The average loss for epoch  8 is      3.14 and mean absolute error is    1.36.
+##
+## Epoch 009: Learning rate is 0.0050.
+## Up to batch 1, the average loss is    3.71.
+## Up to batch 2, the average loss is    2.93.
+## Up to batch 3, the average loss is    2.76.
+## Up to batch 4, the average loss is    2.70.
+## Up to batch 5, the average loss is    2.76.
+## Up to batch 6, the average loss is    2.69.
+## Up to batch 7, the average loss is    2.57.
+## Up to batch 8, the average loss is    2.79.
+## Up to batch 9, the average loss is    2.80.
+## Up to batch 10, the average loss is    2.77.
+## Up to batch 11, the average loss is    2.79.
+## Up to batch 12, the average loss is    2.80.
+## Up to batch 13, the average loss is    2.78.
+## Up to batch 14, the average loss is    2.81.
+## Up to batch 15, the average loss is    2.80.
+## Up to batch 16, the average loss is    2.83.
+## The average loss for epoch  9 is      2.83 and mean absolute error is    1.28.
+##
+## Epoch 010: Learning rate is 0.0050.
+## Up to batch 1, the average loss is    3.02.
+## Up to batch 2, the average loss is    2.69.
+## Up to batch 3, the average loss is    2.58.
+## Up to batch 4, the average loss is    2.57.
+## Up to batch 5, the average loss is    2.65.
+## Up to batch 6, the average loss is    2.60.
+## Up to batch 7, the average loss is    2.48.
+## Up to batch 8, the average loss is    2.72.
+## Up to batch 9, the average loss is    2.74.
+## Up to batch 10, the average loss is    2.71.
+## Up to batch 11, the average loss is    2.74.
+## Up to batch 12, the average loss is    2.75.
+## Up to batch 13, the average loss is    2.74.
+## Up to batch 14, the average loss is    2.77.
+## Up to batch 15, the average loss is    2.77.
+## Up to batch 16, the average loss is    2.80.
+## The average loss for epoch 10 is      2.80 and mean absolute error is    1.27.
+##
+## Epoch 011: Learning rate is 0.0050.
+## Up to batch 1, the average loss is    3.01.
+## Up to batch 2, the average loss is    2.69.
+## Up to batch 3, the average loss is    2.58.
+## Up to batch 4, the average loss is    2.56.
+## Up to batch 5, the average loss is    2.63.
+## Up to batch 6, the average loss is    2.58.
+## Up to batch 7, the average loss is    2.47.
+## Up to batch 8, the average loss is    2.70.
+## Up to batch 9, the average loss is    2.72.
+## Up to batch 10, the average loss is    2.69.
+## Up to batch 11, the average loss is    2.71.
+## Up to batch 12, the average loss is    2.72.
+## Up to batch 13, the average loss is    2.71.
+## Up to batch 14, the average loss is    2.75.
+## Up to batch 15, the average loss is    2.74.
+## Up to batch 16, the average loss is    2.77.
+## The average loss for epoch 11 is      2.77 and mean absolute error is    1.27.
+##
+## Epoch 012: Learning rate is 0.0010.
+## Up to batch 1, the average loss is    2.96.
+## Up to batch 2, the average loss is    2.53.
+## Up to batch 3, the average loss is    2.47.
+## Up to batch 4, the average loss is    2.46.
+## Up to batch 5, the average loss is    2.54.
+## Up to batch 6, the average loss is    2.48.
+## Up to batch 7, the average loss is    2.39.
+## Up to batch 8, the average loss is    2.60.
+## Up to batch 9, the average loss is    2.62.
+## Up to batch 10, the average loss is    2.59.
+## Up to batch 11, the average loss is    2.61.
+## Up to batch 12, the average loss is    2.62.
+## Up to batch 13, the average loss is    2.60.
+## Up to batch 14, the average loss is    2.64.
+## Up to batch 15, the average loss is    2.62.
+## Up to batch 16, the average loss is    2.64.
+## The average loss for epoch 12 is      2.64 and mean absolute error is    1.24.
+##
+## Epoch 013: Learning rate is 0.0010.
+## Up to batch 1, the average loss is    2.82.
+## Up to batch 2, the average loss is    2.46.
+## Up to batch 3, the average loss is    2.42.
+## Up to batch 4, the average loss is    2.42.
+## Up to batch 5, the average loss is    2.50.
+## Up to batch 6, the average loss is    2.45.
+## Up to batch 7, the average loss is    2.36.
+## Up to batch 8, the average loss is    2.57.
+## Up to batch 9, the average loss is    2.59.
+## Up to batch 10, the average loss is    2.57.
+## Up to batch 11, the average loss is    2.59.
+## Up to batch 12, the average loss is    2.60.
+## Up to batch 13, the average loss is    2.59.
+## Up to batch 14, the average loss is    2.62.
+## Up to batch 15, the average loss is    2.61.
+## Up to batch 16, the average loss is    2.63.
+## The average loss for epoch 13 is      2.63 and mean absolute error is    1.23.
+##
+## Epoch 014: Learning rate is 0.0010.
+## Up to batch 1, the average loss is    2.79.
+## Up to batch 2, the average loss is    2.44.
+## Up to batch 3, the average loss is    2.40.
+## Up to batch 4, the average loss is    2.41.
+## Up to batch 5, the average loss is    2.49.
+## Up to batch 6, the average loss is    2.44.
+## Up to batch 7, the average loss is    2.34.
+## Up to batch 8, the average loss is    2.56.
+## Up to batch 9, the average loss is    2.58.
+## Up to batch 10, the average loss is    2.56.
+## Up to batch 11, the average loss is    2.58.
+## Up to batch 12, the average loss is    2.59.
+## Up to batch 13, the average loss is    2.58.
+## Up to batch 14, the average loss is    2.61.
+## Up to batch 15, the average loss is    2.60.
+## Up to batch 16, the average loss is    2.62.
+## The average loss for epoch 14 is      2.62 and mean absolute error is    1.23.
+
+
+

Built-in Keras callbacks +

+

Be sure to check out the existing Keras callbacks by reading the API +docs. Applications include logging to CSV, saving the model, +visualizing metrics in TensorBoard, and a lot more!

+
+
+
+
+ + + +
+ + + +
+
+ + + + + + + diff --git a/docs/authors.html b/docs/authors.html new file mode 100644 index 0000000000..f87d6d7ae1 --- /dev/null +++ b/docs/authors.html @@ -0,0 +1,151 @@ + +Authors and Citation • keras3 + Skip to contents + + +
+
+
+ +
+

Authors

+ +
  • +

    Tomasz Kalinowski. Author, copyright holder, maintainer. +

    +
  • +
  • +

    Daniel Falbel. Contributor, copyright holder. +

    +
  • +
  • +

    JJ Allaire. Author, copyright holder. +

    +
  • +
  • +

    François Chollet. Author, copyright holder. +

    +
  • +
  • +

    Posit Software, PBC. Copyright holder, funder. +

    +
  • +
  • +

    Google. Copyright holder, funder. +

    +
  • +
  • +

    Yuan Tang. Contributor, copyright holder. +

    +
  • +
  • +

    Wouter Van Der Bijl. Contributor, copyright holder. +

    +
  • +
  • +

    Martin Studer. Contributor, copyright holder. +

    +
  • +
  • +

    Sigrid Keydana. Contributor. +

    +
  • +
+ +
+

Citation

+

Source: DESCRIPTION

+ +

Kalinowski T, Allaire J, Chollet F (2024). +keras3: R Interface to 'Keras'. +R package version 0.2.0.9000, https://github.com/rstudio/keras, https://keras.posit.co/. +

+
@Manual{,
+  title = {keras3: R Interface to 'Keras'},
+  author = {Tomasz Kalinowski and JJ Allaire and François Chollet},
+  year = {2024},
+  note = {R package version 0.2.0.9000, https://github.com/rstudio/keras},
+  url = {https://keras.posit.co/},
+}
+
+
+ + +
+ + + +
+ + + + + + + diff --git a/docs/deps/Fira_Mono-0.4.8/N0bS2SlFPv1weGeLZDtondv3mg.woff b/docs/deps/Fira_Mono-0.4.8/N0bS2SlFPv1weGeLZDtondv3mg.woff new file mode 100644 index 0000000000..532b0777e0 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.8/N0bS2SlFPv1weGeLZDtondv3mg.woff differ diff --git a/docs/deps/Fira_Mono-0.4.8/N0bX2SlFPv1weGeLZDtQIg.woff b/docs/deps/Fira_Mono-0.4.8/N0bX2SlFPv1weGeLZDtQIg.woff new file mode 100644 index 0000000000..7f61fe9979 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.8/N0bX2SlFPv1weGeLZDtQIg.woff differ diff --git a/docs/deps/Fira_Mono-0.4.8/font.css b/docs/deps/Fira_Mono-0.4.8/font.css new file mode 100644 index 0000000000..f6dbd0d1fb --- /dev/null +++ b/docs/deps/Fira_Mono-0.4.8/font.css @@ -0,0 +1,14 @@ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(N0bX2SlFPv1weGeLZDtQIg.woff) format('woff'); +} +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 700; + font-display: swap; + src: url(N0bS2SlFPv1weGeLZDtondv3mg.woff) format('woff'); +} diff --git a/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHkPfUWZA.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHkPfUWZA.woff2 new file mode 100644 index 0000000000..a9cc425ecd Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHkPfUWZA.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHk_fUWZA.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHk_fUWZA.woff2 new file mode 100644 index 0000000000..32d4059e9c Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHk_fUWZA.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHkvfUWZA.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHkvfUWZA.woff2 new file mode 100644 index 0000000000..d246793ed6 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHkvfUWZA.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHmvfUWZA.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHmvfUWZA.woff2 new file mode 100644 index 0000000000..bf873cf21f Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHmvfUWZA.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHnffUWZA.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHnffUWZA.woff2 new file mode 100644 index 0000000000..e28bcc61e2 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHnffUWZA.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHnvfU.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHnvfU.woff2 new file mode 100644 index 0000000000..8807b320c1 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bS2SlFPv1weGeLZDtondvHnvfU.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgIv7SodY.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgIv7SodY.woff2 new file mode 100644 index 0000000000..48cf9cf8dc Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgIv7SodY.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgJf7SodY.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgJf7SodY.woff2 new file mode 100644 index 0000000000..01061b4da3 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgJf7SodY.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgJv7S.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgJv7S.woff2 new file mode 100644 index 0000000000..edc71a868a Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgJv7S.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgKP7SodY.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgKP7SodY.woff2 new file mode 100644 index 0000000000..43e41c8563 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgKP7SodY.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgK_7SodY.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgK_7SodY.woff2 new file mode 100644 index 0000000000..d63320267e Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgK_7SodY.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgKv7SodY.woff2 b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgKv7SodY.woff2 new file mode 100644 index 0000000000..4dc2f4b3d9 Binary files /dev/null and b/docs/deps/Fira_Mono-0.4.9/N0bX2SlFPv1weGeLZDtgKv7SodY.woff2 differ diff --git a/docs/deps/Fira_Mono-0.4.9/font.css b/docs/deps/Fira_Mono-0.4.9/font.css new file mode 100644 index 0000000000..8a8fa187c1 --- /dev/null +++ b/docs/deps/Fira_Mono-0.4.9/font.css @@ -0,0 +1,108 @@ +/* cyrillic-ext */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(N0bX2SlFPv1weGeLZDtgK_7SodY.woff2) format('woff2'); + unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; +} +/* cyrillic */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(N0bX2SlFPv1weGeLZDtgIv7SodY.woff2) format('woff2'); + unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; +} +/* greek-ext */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(N0bX2SlFPv1weGeLZDtgKv7SodY.woff2) format('woff2'); + unicode-range: U+1F00-1FFF; +} +/* greek */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(N0bX2SlFPv1weGeLZDtgJf7SodY.woff2) format('woff2'); + unicode-range: U+0370-0377, U+037A-037F, U+0384-038A, U+038C, U+038E-03A1, U+03A3-03FF; +} +/* latin-ext */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(N0bX2SlFPv1weGeLZDtgKP7SodY.woff2) format('woff2'); + unicode-range: U+0100-02AF, U+0304, U+0308, U+0329, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20C0, U+2113, U+2C60-2C7F, U+A720-A7FF; +} +/* latin */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 400; + font-display: swap; + src: url(N0bX2SlFPv1weGeLZDtgJv7S.woff2) format('woff2'); + unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; +} +/* cyrillic-ext */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 700; + font-display: swap; + src: url(N0bS2SlFPv1weGeLZDtondvHk_fUWZA.woff2) format('woff2'); + unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F; +} +/* cyrillic */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 700; + font-display: swap; + src: url(N0bS2SlFPv1weGeLZDtondvHmvfUWZA.woff2) format('woff2'); + unicode-range: U+0301, U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116; +} +/* greek-ext */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 700; + font-display: swap; + src: url(N0bS2SlFPv1weGeLZDtondvHkvfUWZA.woff2) format('woff2'); + unicode-range: U+1F00-1FFF; +} +/* greek */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 700; + font-display: swap; + src: url(N0bS2SlFPv1weGeLZDtondvHnffUWZA.woff2) format('woff2'); + unicode-range: U+0370-0377, U+037A-037F, U+0384-038A, U+038C, U+038E-03A1, U+03A3-03FF; +} +/* latin-ext */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 700; + font-display: swap; + src: url(N0bS2SlFPv1weGeLZDtondvHkPfUWZA.woff2) format('woff2'); + unicode-range: U+0100-02AF, U+0304, U+0308, U+0329, U+1E00-1E9F, U+1EF2-1EFF, U+2020, U+20A0-20AB, U+20AD-20C0, U+2113, U+2C60-2C7F, U+A720-A7FF; +} +/* latin */ +@font-face { + font-family: 'Fira Mono'; + font-style: normal; + font-weight: 700; + font-display: swap; + src: url(N0bS2SlFPv1weGeLZDtondvHnvfU.woff2) format('woff2'); + unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+0304, U+0308, U+0329, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD; +} diff --git a/docs/deps/bootstrap-5.3.1/bootstrap.bundle.min.js b/docs/deps/bootstrap-5.3.1/bootstrap.bundle.min.js new file mode 100644 index 0000000000..e8f21f703f --- /dev/null +++ b/docs/deps/bootstrap-5.3.1/bootstrap.bundle.min.js @@ -0,0 +1,7 @@ +/*! + * Bootstrap v5.3.1 (https://getbootstrap.com/) + * Copyright 2011-2023 The Bootstrap Authors (https://github.com/twbs/bootstrap/graphs/contributors) + * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE) + */ +!function(t,e){"object"==typeof exports&&"undefined"!=typeof module?module.exports=e():"function"==typeof define&&define.amd?define(e):(t="undefined"!=typeof globalThis?globalThis:t||self).bootstrap=e()}(this,(function(){"use strict";const t=new Map,e={set(e,i,n){t.has(e)||t.set(e,new Map);const s=t.get(e);s.has(i)||0===s.size?s.set(i,n):console.error(`Bootstrap doesn't allow more than one instance per element. Bound instance: ${Array.from(s.keys())[0]}.`)},get:(e,i)=>t.has(e)&&t.get(e).get(i)||null,remove(e,i){if(!t.has(e))return;const n=t.get(e);n.delete(i),0===n.size&&t.delete(e)}},i="transitionend",n=t=>(t&&window.CSS&&window.CSS.escape&&(t=t.replace(/#([^\s"#']+)/g,((t,e)=>`#${CSS.escape(e)}`))),t),s=t=>{t.dispatchEvent(new Event(i))},o=t=>!(!t||"object"!=typeof t)&&(void 0!==t.jquery&&(t=t[0]),void 0!==t.nodeType),r=t=>o(t)?t.jquery?t[0]:t:"string"==typeof t&&t.length>0?document.querySelector(n(t)):null,a=t=>{if(!o(t)||0===t.getClientRects().length)return!1;const e="visible"===getComputedStyle(t).getPropertyValue("visibility"),i=t.closest("details:not([open])");if(!i)return e;if(i!==t){const e=t.closest("summary");if(e&&e.parentNode!==i)return!1;if(null===e)return!1}return e},l=t=>!t||t.nodeType!==Node.ELEMENT_NODE||!!t.classList.contains("disabled")||(void 0!==t.disabled?t.disabled:t.hasAttribute("disabled")&&"false"!==t.getAttribute("disabled")),c=t=>{if(!document.documentElement.attachShadow)return null;if("function"==typeof t.getRootNode){const e=t.getRootNode();return e instanceof ShadowRoot?e:null}return t instanceof ShadowRoot?t:t.parentNode?c(t.parentNode):null},h=()=>{},d=t=>{t.offsetHeight},u=()=>window.jQuery&&!document.body.hasAttribute("data-bs-no-jquery")?window.jQuery:null,f=[],p=()=>"rtl"===document.documentElement.dir,m=t=>{var e;e=()=>{const e=u();if(e){const i=t.NAME,n=e.fn[i];e.fn[i]=t.jQueryInterface,e.fn[i].Constructor=t,e.fn[i].noConflict=()=>(e.fn[i]=n,t.jQueryInterface)}},"loading"===document.readyState?(f.length||document.addEventListener("DOMContentLoaded",(()=>{for(const t of f)t()})),f.push(e)):e()},g=(t,e=[],i=t)=>"function"==typeof t?t(...e):i,_=(t,e,n=!0)=>{if(!n)return void g(t);const o=(t=>{if(!t)return 0;let{transitionDuration:e,transitionDelay:i}=window.getComputedStyle(t);const n=Number.parseFloat(e),s=Number.parseFloat(i);return n||s?(e=e.split(",")[0],i=i.split(",")[0],1e3*(Number.parseFloat(e)+Number.parseFloat(i))):0})(e)+5;let r=!1;const a=({target:n})=>{n===e&&(r=!0,e.removeEventListener(i,a),g(t))};e.addEventListener(i,a),setTimeout((()=>{r||s(e)}),o)},b=(t,e,i,n)=>{const s=t.length;let o=t.indexOf(e);return-1===o?!i&&n?t[s-1]:t[0]:(o+=i?1:-1,n&&(o=(o+s)%s),t[Math.max(0,Math.min(o,s-1))])},v=/[^.]*(?=\..*)\.|.*/,y=/\..*/,w=/::\d+$/,A={};let E=1;const T={mouseenter:"mouseover",mouseleave:"mouseout"},C=new Set(["click","dblclick","mouseup","mousedown","contextmenu","mousewheel","DOMMouseScroll","mouseover","mouseout","mousemove","selectstart","selectend","keydown","keypress","keyup","orientationchange","touchstart","touchmove","touchend","touchcancel","pointerdown","pointermove","pointerup","pointerleave","pointercancel","gesturestart","gesturechange","gestureend","focus","blur","change","reset","select","submit","focusin","focusout","load","unload","beforeunload","resize","move","DOMContentLoaded","readystatechange","error","abort","scroll"]);function O(t,e){return e&&`${e}::${E++}`||t.uidEvent||E++}function x(t){const e=O(t);return t.uidEvent=e,A[e]=A[e]||{},A[e]}function k(t,e,i=null){return Object.values(t).find((t=>t.callable===e&&t.delegationSelector===i))}function L(t,e,i){const n="string"==typeof e,s=n?i:e||i;let o=I(t);return C.has(o)||(o=t),[n,s,o]}function S(t,e,i,n,s){if("string"!=typeof e||!t)return;let[o,r,a]=L(e,i,n);if(e in T){const t=t=>function(e){if(!e.relatedTarget||e.relatedTarget!==e.delegateTarget&&!e.delegateTarget.contains(e.relatedTarget))return t.call(this,e)};r=t(r)}const l=x(t),c=l[a]||(l[a]={}),h=k(c,r,o?i:null);if(h)return void(h.oneOff=h.oneOff&&s);const d=O(r,e.replace(v,"")),u=o?function(t,e,i){return function n(s){const o=t.querySelectorAll(e);for(let{target:r}=s;r&&r!==this;r=r.parentNode)for(const a of o)if(a===r)return P(s,{delegateTarget:r}),n.oneOff&&N.off(t,s.type,e,i),i.apply(r,[s])}}(t,i,r):function(t,e){return function i(n){return P(n,{delegateTarget:t}),i.oneOff&&N.off(t,n.type,e),e.apply(t,[n])}}(t,r);u.delegationSelector=o?i:null,u.callable=r,u.oneOff=s,u.uidEvent=d,c[d]=u,t.addEventListener(a,u,o)}function D(t,e,i,n,s){const o=k(e[i],n,s);o&&(t.removeEventListener(i,o,Boolean(s)),delete e[i][o.uidEvent])}function $(t,e,i,n){const s=e[i]||{};for(const[o,r]of Object.entries(s))o.includes(n)&&D(t,e,i,r.callable,r.delegationSelector)}function I(t){return t=t.replace(y,""),T[t]||t}const N={on(t,e,i,n){S(t,e,i,n,!1)},one(t,e,i,n){S(t,e,i,n,!0)},off(t,e,i,n){if("string"!=typeof e||!t)return;const[s,o,r]=L(e,i,n),a=r!==e,l=x(t),c=l[r]||{},h=e.startsWith(".");if(void 0===o){if(h)for(const i of Object.keys(l))$(t,l,i,e.slice(1));for(const[i,n]of Object.entries(c)){const s=i.replace(w,"");a&&!e.includes(s)||D(t,l,r,n.callable,n.delegationSelector)}}else{if(!Object.keys(c).length)return;D(t,l,r,o,s?i:null)}},trigger(t,e,i){if("string"!=typeof e||!t)return null;const n=u();let s=null,o=!0,r=!0,a=!1;e!==I(e)&&n&&(s=n.Event(e,i),n(t).trigger(s),o=!s.isPropagationStopped(),r=!s.isImmediatePropagationStopped(),a=s.isDefaultPrevented());const l=P(new Event(e,{bubbles:o,cancelable:!0}),i);return a&&l.preventDefault(),r&&t.dispatchEvent(l),l.defaultPrevented&&s&&s.preventDefault(),l}};function P(t,e={}){for(const[i,n]of Object.entries(e))try{t[i]=n}catch(e){Object.defineProperty(t,i,{configurable:!0,get:()=>n})}return t}function M(t){if("true"===t)return!0;if("false"===t)return!1;if(t===Number(t).toString())return Number(t);if(""===t||"null"===t)return null;if("string"!=typeof t)return t;try{return JSON.parse(decodeURIComponent(t))}catch(e){return t}}function j(t){return t.replace(/[A-Z]/g,(t=>`-${t.toLowerCase()}`))}const F={setDataAttribute(t,e,i){t.setAttribute(`data-bs-${j(e)}`,i)},removeDataAttribute(t,e){t.removeAttribute(`data-bs-${j(e)}`)},getDataAttributes(t){if(!t)return{};const e={},i=Object.keys(t.dataset).filter((t=>t.startsWith("bs")&&!t.startsWith("bsConfig")));for(const n of i){let i=n.replace(/^bs/,"");i=i.charAt(0).toLowerCase()+i.slice(1,i.length),e[i]=M(t.dataset[n])}return e},getDataAttribute:(t,e)=>M(t.getAttribute(`data-bs-${j(e)}`))};class H{static get Default(){return{}}static get DefaultType(){return{}}static get NAME(){throw new Error('You have to implement the static method "NAME", for each component!')}_getConfig(t){return t=this._mergeConfigObj(t),t=this._configAfterMerge(t),this._typeCheckConfig(t),t}_configAfterMerge(t){return t}_mergeConfigObj(t,e){const i=o(e)?F.getDataAttribute(e,"config"):{};return{...this.constructor.Default,..."object"==typeof i?i:{},...o(e)?F.getDataAttributes(e):{},..."object"==typeof t?t:{}}}_typeCheckConfig(t,e=this.constructor.DefaultType){for(const[n,s]of Object.entries(e)){const e=t[n],r=o(e)?"element":null==(i=e)?`${i}`:Object.prototype.toString.call(i).match(/\s([a-z]+)/i)[1].toLowerCase();if(!new RegExp(s).test(r))throw new TypeError(`${this.constructor.NAME.toUpperCase()}: Option "${n}" provided type "${r}" but expected type "${s}".`)}var i}}class W extends H{constructor(t,i){super(),(t=r(t))&&(this._element=t,this._config=this._getConfig(i),e.set(this._element,this.constructor.DATA_KEY,this))}dispose(){e.remove(this._element,this.constructor.DATA_KEY),N.off(this._element,this.constructor.EVENT_KEY);for(const t of Object.getOwnPropertyNames(this))this[t]=null}_queueCallback(t,e,i=!0){_(t,e,i)}_getConfig(t){return t=this._mergeConfigObj(t,this._element),t=this._configAfterMerge(t),this._typeCheckConfig(t),t}static getInstance(t){return e.get(r(t),this.DATA_KEY)}static getOrCreateInstance(t,e={}){return this.getInstance(t)||new this(t,"object"==typeof e?e:null)}static get VERSION(){return"5.3.1"}static get DATA_KEY(){return`bs.${this.NAME}`}static get EVENT_KEY(){return`.${this.DATA_KEY}`}static eventName(t){return`${t}${this.EVENT_KEY}`}}const B=t=>{let e=t.getAttribute("data-bs-target");if(!e||"#"===e){let i=t.getAttribute("href");if(!i||!i.includes("#")&&!i.startsWith("."))return null;i.includes("#")&&!i.startsWith("#")&&(i=`#${i.split("#")[1]}`),e=i&&"#"!==i?i.trim():null}return n(e)},z={find:(t,e=document.documentElement)=>[].concat(...Element.prototype.querySelectorAll.call(e,t)),findOne:(t,e=document.documentElement)=>Element.prototype.querySelector.call(e,t),children:(t,e)=>[].concat(...t.children).filter((t=>t.matches(e))),parents(t,e){const i=[];let n=t.parentNode.closest(e);for(;n;)i.push(n),n=n.parentNode.closest(e);return i},prev(t,e){let i=t.previousElementSibling;for(;i;){if(i.matches(e))return[i];i=i.previousElementSibling}return[]},next(t,e){let i=t.nextElementSibling;for(;i;){if(i.matches(e))return[i];i=i.nextElementSibling}return[]},focusableChildren(t){const e=["a","button","input","textarea","select","details","[tabindex]",'[contenteditable="true"]'].map((t=>`${t}:not([tabindex^="-"])`)).join(",");return this.find(e,t).filter((t=>!l(t)&&a(t)))},getSelectorFromElement(t){const e=B(t);return e&&z.findOne(e)?e:null},getElementFromSelector(t){const e=B(t);return e?z.findOne(e):null},getMultipleElementsFromSelector(t){const e=B(t);return e?z.find(e):[]}},R=(t,e="hide")=>{const i=`click.dismiss${t.EVENT_KEY}`,n=t.NAME;N.on(document,i,`[data-bs-dismiss="${n}"]`,(function(i){if(["A","AREA"].includes(this.tagName)&&i.preventDefault(),l(this))return;const s=z.getElementFromSelector(this)||this.closest(`.${n}`);t.getOrCreateInstance(s)[e]()}))},q=".bs.alert",V=`close${q}`,K=`closed${q}`;class Q extends W{static get NAME(){return"alert"}close(){if(N.trigger(this._element,V).defaultPrevented)return;this._element.classList.remove("show");const t=this._element.classList.contains("fade");this._queueCallback((()=>this._destroyElement()),this._element,t)}_destroyElement(){this._element.remove(),N.trigger(this._element,K),this.dispose()}static jQueryInterface(t){return this.each((function(){const e=Q.getOrCreateInstance(this);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}R(Q,"close"),m(Q);const X='[data-bs-toggle="button"]';class Y extends W{static get NAME(){return"button"}toggle(){this._element.setAttribute("aria-pressed",this._element.classList.toggle("active"))}static jQueryInterface(t){return this.each((function(){const e=Y.getOrCreateInstance(this);"toggle"===t&&e[t]()}))}}N.on(document,"click.bs.button.data-api",X,(t=>{t.preventDefault();const e=t.target.closest(X);Y.getOrCreateInstance(e).toggle()})),m(Y);const U=".bs.swipe",G=`touchstart${U}`,J=`touchmove${U}`,Z=`touchend${U}`,tt=`pointerdown${U}`,et=`pointerup${U}`,it={endCallback:null,leftCallback:null,rightCallback:null},nt={endCallback:"(function|null)",leftCallback:"(function|null)",rightCallback:"(function|null)"};class st extends H{constructor(t,e){super(),this._element=t,t&&st.isSupported()&&(this._config=this._getConfig(e),this._deltaX=0,this._supportPointerEvents=Boolean(window.PointerEvent),this._initEvents())}static get Default(){return it}static get DefaultType(){return nt}static get NAME(){return"swipe"}dispose(){N.off(this._element,U)}_start(t){this._supportPointerEvents?this._eventIsPointerPenTouch(t)&&(this._deltaX=t.clientX):this._deltaX=t.touches[0].clientX}_end(t){this._eventIsPointerPenTouch(t)&&(this._deltaX=t.clientX-this._deltaX),this._handleSwipe(),g(this._config.endCallback)}_move(t){this._deltaX=t.touches&&t.touches.length>1?0:t.touches[0].clientX-this._deltaX}_handleSwipe(){const t=Math.abs(this._deltaX);if(t<=40)return;const e=t/this._deltaX;this._deltaX=0,e&&g(e>0?this._config.rightCallback:this._config.leftCallback)}_initEvents(){this._supportPointerEvents?(N.on(this._element,tt,(t=>this._start(t))),N.on(this._element,et,(t=>this._end(t))),this._element.classList.add("pointer-event")):(N.on(this._element,G,(t=>this._start(t))),N.on(this._element,J,(t=>this._move(t))),N.on(this._element,Z,(t=>this._end(t))))}_eventIsPointerPenTouch(t){return this._supportPointerEvents&&("pen"===t.pointerType||"touch"===t.pointerType)}static isSupported(){return"ontouchstart"in document.documentElement||navigator.maxTouchPoints>0}}const ot=".bs.carousel",rt=".data-api",at="next",lt="prev",ct="left",ht="right",dt=`slide${ot}`,ut=`slid${ot}`,ft=`keydown${ot}`,pt=`mouseenter${ot}`,mt=`mouseleave${ot}`,gt=`dragstart${ot}`,_t=`load${ot}${rt}`,bt=`click${ot}${rt}`,vt="carousel",yt="active",wt=".active",At=".carousel-item",Et=wt+At,Tt={ArrowLeft:ht,ArrowRight:ct},Ct={interval:5e3,keyboard:!0,pause:"hover",ride:!1,touch:!0,wrap:!0},Ot={interval:"(number|boolean)",keyboard:"boolean",pause:"(string|boolean)",ride:"(boolean|string)",touch:"boolean",wrap:"boolean"};class xt extends W{constructor(t,e){super(t,e),this._interval=null,this._activeElement=null,this._isSliding=!1,this.touchTimeout=null,this._swipeHelper=null,this._indicatorsElement=z.findOne(".carousel-indicators",this._element),this._addEventListeners(),this._config.ride===vt&&this.cycle()}static get Default(){return Ct}static get DefaultType(){return Ot}static get NAME(){return"carousel"}next(){this._slide(at)}nextWhenVisible(){!document.hidden&&a(this._element)&&this.next()}prev(){this._slide(lt)}pause(){this._isSliding&&s(this._element),this._clearInterval()}cycle(){this._clearInterval(),this._updateInterval(),this._interval=setInterval((()=>this.nextWhenVisible()),this._config.interval)}_maybeEnableCycle(){this._config.ride&&(this._isSliding?N.one(this._element,ut,(()=>this.cycle())):this.cycle())}to(t){const e=this._getItems();if(t>e.length-1||t<0)return;if(this._isSliding)return void N.one(this._element,ut,(()=>this.to(t)));const i=this._getItemIndex(this._getActive());if(i===t)return;const n=t>i?at:lt;this._slide(n,e[t])}dispose(){this._swipeHelper&&this._swipeHelper.dispose(),super.dispose()}_configAfterMerge(t){return t.defaultInterval=t.interval,t}_addEventListeners(){this._config.keyboard&&N.on(this._element,ft,(t=>this._keydown(t))),"hover"===this._config.pause&&(N.on(this._element,pt,(()=>this.pause())),N.on(this._element,mt,(()=>this._maybeEnableCycle()))),this._config.touch&&st.isSupported()&&this._addTouchEventListeners()}_addTouchEventListeners(){for(const t of z.find(".carousel-item img",this._element))N.on(t,gt,(t=>t.preventDefault()));const t={leftCallback:()=>this._slide(this._directionToOrder(ct)),rightCallback:()=>this._slide(this._directionToOrder(ht)),endCallback:()=>{"hover"===this._config.pause&&(this.pause(),this.touchTimeout&&clearTimeout(this.touchTimeout),this.touchTimeout=setTimeout((()=>this._maybeEnableCycle()),500+this._config.interval))}};this._swipeHelper=new st(this._element,t)}_keydown(t){if(/input|textarea/i.test(t.target.tagName))return;const e=Tt[t.key];e&&(t.preventDefault(),this._slide(this._directionToOrder(e)))}_getItemIndex(t){return this._getItems().indexOf(t)}_setActiveIndicatorElement(t){if(!this._indicatorsElement)return;const e=z.findOne(wt,this._indicatorsElement);e.classList.remove(yt),e.removeAttribute("aria-current");const i=z.findOne(`[data-bs-slide-to="${t}"]`,this._indicatorsElement);i&&(i.classList.add(yt),i.setAttribute("aria-current","true"))}_updateInterval(){const t=this._activeElement||this._getActive();if(!t)return;const e=Number.parseInt(t.getAttribute("data-bs-interval"),10);this._config.interval=e||this._config.defaultInterval}_slide(t,e=null){if(this._isSliding)return;const i=this._getActive(),n=t===at,s=e||b(this._getItems(),i,n,this._config.wrap);if(s===i)return;const o=this._getItemIndex(s),r=e=>N.trigger(this._element,e,{relatedTarget:s,direction:this._orderToDirection(t),from:this._getItemIndex(i),to:o});if(r(dt).defaultPrevented)return;if(!i||!s)return;const a=Boolean(this._interval);this.pause(),this._isSliding=!0,this._setActiveIndicatorElement(o),this._activeElement=s;const l=n?"carousel-item-start":"carousel-item-end",c=n?"carousel-item-next":"carousel-item-prev";s.classList.add(c),d(s),i.classList.add(l),s.classList.add(l),this._queueCallback((()=>{s.classList.remove(l,c),s.classList.add(yt),i.classList.remove(yt,c,l),this._isSliding=!1,r(ut)}),i,this._isAnimated()),a&&this.cycle()}_isAnimated(){return this._element.classList.contains("slide")}_getActive(){return z.findOne(Et,this._element)}_getItems(){return z.find(At,this._element)}_clearInterval(){this._interval&&(clearInterval(this._interval),this._interval=null)}_directionToOrder(t){return p()?t===ct?lt:at:t===ct?at:lt}_orderToDirection(t){return p()?t===lt?ct:ht:t===lt?ht:ct}static jQueryInterface(t){return this.each((function(){const e=xt.getOrCreateInstance(this,t);if("number"!=typeof t){if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t]()}}else e.to(t)}))}}N.on(document,bt,"[data-bs-slide], [data-bs-slide-to]",(function(t){const e=z.getElementFromSelector(this);if(!e||!e.classList.contains(vt))return;t.preventDefault();const i=xt.getOrCreateInstance(e),n=this.getAttribute("data-bs-slide-to");return n?(i.to(n),void i._maybeEnableCycle()):"next"===F.getDataAttribute(this,"slide")?(i.next(),void i._maybeEnableCycle()):(i.prev(),void i._maybeEnableCycle())})),N.on(window,_t,(()=>{const t=z.find('[data-bs-ride="carousel"]');for(const e of t)xt.getOrCreateInstance(e)})),m(xt);const kt=".bs.collapse",Lt=`show${kt}`,St=`shown${kt}`,Dt=`hide${kt}`,$t=`hidden${kt}`,It=`click${kt}.data-api`,Nt="show",Pt="collapse",Mt="collapsing",jt=`:scope .${Pt} .${Pt}`,Ft='[data-bs-toggle="collapse"]',Ht={parent:null,toggle:!0},Wt={parent:"(null|element)",toggle:"boolean"};class Bt extends W{constructor(t,e){super(t,e),this._isTransitioning=!1,this._triggerArray=[];const i=z.find(Ft);for(const t of i){const e=z.getSelectorFromElement(t),i=z.find(e).filter((t=>t===this._element));null!==e&&i.length&&this._triggerArray.push(t)}this._initializeChildren(),this._config.parent||this._addAriaAndCollapsedClass(this._triggerArray,this._isShown()),this._config.toggle&&this.toggle()}static get Default(){return Ht}static get DefaultType(){return Wt}static get NAME(){return"collapse"}toggle(){this._isShown()?this.hide():this.show()}show(){if(this._isTransitioning||this._isShown())return;let t=[];if(this._config.parent&&(t=this._getFirstLevelChildren(".collapse.show, .collapse.collapsing").filter((t=>t!==this._element)).map((t=>Bt.getOrCreateInstance(t,{toggle:!1})))),t.length&&t[0]._isTransitioning)return;if(N.trigger(this._element,Lt).defaultPrevented)return;for(const e of t)e.hide();const e=this._getDimension();this._element.classList.remove(Pt),this._element.classList.add(Mt),this._element.style[e]=0,this._addAriaAndCollapsedClass(this._triggerArray,!0),this._isTransitioning=!0;const i=`scroll${e[0].toUpperCase()+e.slice(1)}`;this._queueCallback((()=>{this._isTransitioning=!1,this._element.classList.remove(Mt),this._element.classList.add(Pt,Nt),this._element.style[e]="",N.trigger(this._element,St)}),this._element,!0),this._element.style[e]=`${this._element[i]}px`}hide(){if(this._isTransitioning||!this._isShown())return;if(N.trigger(this._element,Dt).defaultPrevented)return;const t=this._getDimension();this._element.style[t]=`${this._element.getBoundingClientRect()[t]}px`,d(this._element),this._element.classList.add(Mt),this._element.classList.remove(Pt,Nt);for(const t of this._triggerArray){const e=z.getElementFromSelector(t);e&&!this._isShown(e)&&this._addAriaAndCollapsedClass([t],!1)}this._isTransitioning=!0,this._element.style[t]="",this._queueCallback((()=>{this._isTransitioning=!1,this._element.classList.remove(Mt),this._element.classList.add(Pt),N.trigger(this._element,$t)}),this._element,!0)}_isShown(t=this._element){return t.classList.contains(Nt)}_configAfterMerge(t){return t.toggle=Boolean(t.toggle),t.parent=r(t.parent),t}_getDimension(){return this._element.classList.contains("collapse-horizontal")?"width":"height"}_initializeChildren(){if(!this._config.parent)return;const t=this._getFirstLevelChildren(Ft);for(const e of t){const t=z.getElementFromSelector(e);t&&this._addAriaAndCollapsedClass([e],this._isShown(t))}}_getFirstLevelChildren(t){const e=z.find(jt,this._config.parent);return z.find(t,this._config.parent).filter((t=>!e.includes(t)))}_addAriaAndCollapsedClass(t,e){if(t.length)for(const i of t)i.classList.toggle("collapsed",!e),i.setAttribute("aria-expanded",e)}static jQueryInterface(t){const e={};return"string"==typeof t&&/show|hide/.test(t)&&(e.toggle=!1),this.each((function(){const i=Bt.getOrCreateInstance(this,e);if("string"==typeof t){if(void 0===i[t])throw new TypeError(`No method named "${t}"`);i[t]()}}))}}N.on(document,It,Ft,(function(t){("A"===t.target.tagName||t.delegateTarget&&"A"===t.delegateTarget.tagName)&&t.preventDefault();for(const t of z.getMultipleElementsFromSelector(this))Bt.getOrCreateInstance(t,{toggle:!1}).toggle()})),m(Bt);var zt="top",Rt="bottom",qt="right",Vt="left",Kt="auto",Qt=[zt,Rt,qt,Vt],Xt="start",Yt="end",Ut="clippingParents",Gt="viewport",Jt="popper",Zt="reference",te=Qt.reduce((function(t,e){return t.concat([e+"-"+Xt,e+"-"+Yt])}),[]),ee=[].concat(Qt,[Kt]).reduce((function(t,e){return t.concat([e,e+"-"+Xt,e+"-"+Yt])}),[]),ie="beforeRead",ne="read",se="afterRead",oe="beforeMain",re="main",ae="afterMain",le="beforeWrite",ce="write",he="afterWrite",de=[ie,ne,se,oe,re,ae,le,ce,he];function ue(t){return t?(t.nodeName||"").toLowerCase():null}function fe(t){if(null==t)return window;if("[object Window]"!==t.toString()){var e=t.ownerDocument;return e&&e.defaultView||window}return t}function pe(t){return t instanceof fe(t).Element||t instanceof Element}function me(t){return t instanceof fe(t).HTMLElement||t instanceof HTMLElement}function ge(t){return"undefined"!=typeof ShadowRoot&&(t instanceof fe(t).ShadowRoot||t instanceof ShadowRoot)}const _e={name:"applyStyles",enabled:!0,phase:"write",fn:function(t){var e=t.state;Object.keys(e.elements).forEach((function(t){var i=e.styles[t]||{},n=e.attributes[t]||{},s=e.elements[t];me(s)&&ue(s)&&(Object.assign(s.style,i),Object.keys(n).forEach((function(t){var e=n[t];!1===e?s.removeAttribute(t):s.setAttribute(t,!0===e?"":e)})))}))},effect:function(t){var e=t.state,i={popper:{position:e.options.strategy,left:"0",top:"0",margin:"0"},arrow:{position:"absolute"},reference:{}};return Object.assign(e.elements.popper.style,i.popper),e.styles=i,e.elements.arrow&&Object.assign(e.elements.arrow.style,i.arrow),function(){Object.keys(e.elements).forEach((function(t){var n=e.elements[t],s=e.attributes[t]||{},o=Object.keys(e.styles.hasOwnProperty(t)?e.styles[t]:i[t]).reduce((function(t,e){return t[e]="",t}),{});me(n)&&ue(n)&&(Object.assign(n.style,o),Object.keys(s).forEach((function(t){n.removeAttribute(t)})))}))}},requires:["computeStyles"]};function be(t){return t.split("-")[0]}var ve=Math.max,ye=Math.min,we=Math.round;function Ae(){var t=navigator.userAgentData;return null!=t&&t.brands&&Array.isArray(t.brands)?t.brands.map((function(t){return t.brand+"/"+t.version})).join(" "):navigator.userAgent}function Ee(){return!/^((?!chrome|android).)*safari/i.test(Ae())}function Te(t,e,i){void 0===e&&(e=!1),void 0===i&&(i=!1);var n=t.getBoundingClientRect(),s=1,o=1;e&&me(t)&&(s=t.offsetWidth>0&&we(n.width)/t.offsetWidth||1,o=t.offsetHeight>0&&we(n.height)/t.offsetHeight||1);var r=(pe(t)?fe(t):window).visualViewport,a=!Ee()&&i,l=(n.left+(a&&r?r.offsetLeft:0))/s,c=(n.top+(a&&r?r.offsetTop:0))/o,h=n.width/s,d=n.height/o;return{width:h,height:d,top:c,right:l+h,bottom:c+d,left:l,x:l,y:c}}function Ce(t){var e=Te(t),i=t.offsetWidth,n=t.offsetHeight;return Math.abs(e.width-i)<=1&&(i=e.width),Math.abs(e.height-n)<=1&&(n=e.height),{x:t.offsetLeft,y:t.offsetTop,width:i,height:n}}function Oe(t,e){var i=e.getRootNode&&e.getRootNode();if(t.contains(e))return!0;if(i&&ge(i)){var n=e;do{if(n&&t.isSameNode(n))return!0;n=n.parentNode||n.host}while(n)}return!1}function xe(t){return fe(t).getComputedStyle(t)}function ke(t){return["table","td","th"].indexOf(ue(t))>=0}function Le(t){return((pe(t)?t.ownerDocument:t.document)||window.document).documentElement}function Se(t){return"html"===ue(t)?t:t.assignedSlot||t.parentNode||(ge(t)?t.host:null)||Le(t)}function De(t){return me(t)&&"fixed"!==xe(t).position?t.offsetParent:null}function $e(t){for(var e=fe(t),i=De(t);i&&ke(i)&&"static"===xe(i).position;)i=De(i);return i&&("html"===ue(i)||"body"===ue(i)&&"static"===xe(i).position)?e:i||function(t){var e=/firefox/i.test(Ae());if(/Trident/i.test(Ae())&&me(t)&&"fixed"===xe(t).position)return null;var i=Se(t);for(ge(i)&&(i=i.host);me(i)&&["html","body"].indexOf(ue(i))<0;){var n=xe(i);if("none"!==n.transform||"none"!==n.perspective||"paint"===n.contain||-1!==["transform","perspective"].indexOf(n.willChange)||e&&"filter"===n.willChange||e&&n.filter&&"none"!==n.filter)return i;i=i.parentNode}return null}(t)||e}function Ie(t){return["top","bottom"].indexOf(t)>=0?"x":"y"}function Ne(t,e,i){return ve(t,ye(e,i))}function Pe(t){return Object.assign({},{top:0,right:0,bottom:0,left:0},t)}function Me(t,e){return e.reduce((function(e,i){return e[i]=t,e}),{})}const je={name:"arrow",enabled:!0,phase:"main",fn:function(t){var e,i=t.state,n=t.name,s=t.options,o=i.elements.arrow,r=i.modifiersData.popperOffsets,a=be(i.placement),l=Ie(a),c=[Vt,qt].indexOf(a)>=0?"height":"width";if(o&&r){var h=function(t,e){return Pe("number"!=typeof(t="function"==typeof t?t(Object.assign({},e.rects,{placement:e.placement})):t)?t:Me(t,Qt))}(s.padding,i),d=Ce(o),u="y"===l?zt:Vt,f="y"===l?Rt:qt,p=i.rects.reference[c]+i.rects.reference[l]-r[l]-i.rects.popper[c],m=r[l]-i.rects.reference[l],g=$e(o),_=g?"y"===l?g.clientHeight||0:g.clientWidth||0:0,b=p/2-m/2,v=h[u],y=_-d[c]-h[f],w=_/2-d[c]/2+b,A=Ne(v,w,y),E=l;i.modifiersData[n]=((e={})[E]=A,e.centerOffset=A-w,e)}},effect:function(t){var e=t.state,i=t.options.element,n=void 0===i?"[data-popper-arrow]":i;null!=n&&("string"!=typeof n||(n=e.elements.popper.querySelector(n)))&&Oe(e.elements.popper,n)&&(e.elements.arrow=n)},requires:["popperOffsets"],requiresIfExists:["preventOverflow"]};function Fe(t){return t.split("-")[1]}var He={top:"auto",right:"auto",bottom:"auto",left:"auto"};function We(t){var e,i=t.popper,n=t.popperRect,s=t.placement,o=t.variation,r=t.offsets,a=t.position,l=t.gpuAcceleration,c=t.adaptive,h=t.roundOffsets,d=t.isFixed,u=r.x,f=void 0===u?0:u,p=r.y,m=void 0===p?0:p,g="function"==typeof h?h({x:f,y:m}):{x:f,y:m};f=g.x,m=g.y;var _=r.hasOwnProperty("x"),b=r.hasOwnProperty("y"),v=Vt,y=zt,w=window;if(c){var A=$e(i),E="clientHeight",T="clientWidth";A===fe(i)&&"static"!==xe(A=Le(i)).position&&"absolute"===a&&(E="scrollHeight",T="scrollWidth"),(s===zt||(s===Vt||s===qt)&&o===Yt)&&(y=Rt,m-=(d&&A===w&&w.visualViewport?w.visualViewport.height:A[E])-n.height,m*=l?1:-1),s!==Vt&&(s!==zt&&s!==Rt||o!==Yt)||(v=qt,f-=(d&&A===w&&w.visualViewport?w.visualViewport.width:A[T])-n.width,f*=l?1:-1)}var C,O=Object.assign({position:a},c&&He),x=!0===h?function(t,e){var i=t.x,n=t.y,s=e.devicePixelRatio||1;return{x:we(i*s)/s||0,y:we(n*s)/s||0}}({x:f,y:m},fe(i)):{x:f,y:m};return f=x.x,m=x.y,l?Object.assign({},O,((C={})[y]=b?"0":"",C[v]=_?"0":"",C.transform=(w.devicePixelRatio||1)<=1?"translate("+f+"px, "+m+"px)":"translate3d("+f+"px, "+m+"px, 0)",C)):Object.assign({},O,((e={})[y]=b?m+"px":"",e[v]=_?f+"px":"",e.transform="",e))}const Be={name:"computeStyles",enabled:!0,phase:"beforeWrite",fn:function(t){var e=t.state,i=t.options,n=i.gpuAcceleration,s=void 0===n||n,o=i.adaptive,r=void 0===o||o,a=i.roundOffsets,l=void 0===a||a,c={placement:be(e.placement),variation:Fe(e.placement),popper:e.elements.popper,popperRect:e.rects.popper,gpuAcceleration:s,isFixed:"fixed"===e.options.strategy};null!=e.modifiersData.popperOffsets&&(e.styles.popper=Object.assign({},e.styles.popper,We(Object.assign({},c,{offsets:e.modifiersData.popperOffsets,position:e.options.strategy,adaptive:r,roundOffsets:l})))),null!=e.modifiersData.arrow&&(e.styles.arrow=Object.assign({},e.styles.arrow,We(Object.assign({},c,{offsets:e.modifiersData.arrow,position:"absolute",adaptive:!1,roundOffsets:l})))),e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-placement":e.placement})},data:{}};var ze={passive:!0};const Re={name:"eventListeners",enabled:!0,phase:"write",fn:function(){},effect:function(t){var e=t.state,i=t.instance,n=t.options,s=n.scroll,o=void 0===s||s,r=n.resize,a=void 0===r||r,l=fe(e.elements.popper),c=[].concat(e.scrollParents.reference,e.scrollParents.popper);return o&&c.forEach((function(t){t.addEventListener("scroll",i.update,ze)})),a&&l.addEventListener("resize",i.update,ze),function(){o&&c.forEach((function(t){t.removeEventListener("scroll",i.update,ze)})),a&&l.removeEventListener("resize",i.update,ze)}},data:{}};var qe={left:"right",right:"left",bottom:"top",top:"bottom"};function Ve(t){return t.replace(/left|right|bottom|top/g,(function(t){return qe[t]}))}var Ke={start:"end",end:"start"};function Qe(t){return t.replace(/start|end/g,(function(t){return Ke[t]}))}function Xe(t){var e=fe(t);return{scrollLeft:e.pageXOffset,scrollTop:e.pageYOffset}}function Ye(t){return Te(Le(t)).left+Xe(t).scrollLeft}function Ue(t){var e=xe(t),i=e.overflow,n=e.overflowX,s=e.overflowY;return/auto|scroll|overlay|hidden/.test(i+s+n)}function Ge(t){return["html","body","#document"].indexOf(ue(t))>=0?t.ownerDocument.body:me(t)&&Ue(t)?t:Ge(Se(t))}function Je(t,e){var i;void 0===e&&(e=[]);var n=Ge(t),s=n===(null==(i=t.ownerDocument)?void 0:i.body),o=fe(n),r=s?[o].concat(o.visualViewport||[],Ue(n)?n:[]):n,a=e.concat(r);return s?a:a.concat(Je(Se(r)))}function Ze(t){return Object.assign({},t,{left:t.x,top:t.y,right:t.x+t.width,bottom:t.y+t.height})}function ti(t,e,i){return e===Gt?Ze(function(t,e){var i=fe(t),n=Le(t),s=i.visualViewport,o=n.clientWidth,r=n.clientHeight,a=0,l=0;if(s){o=s.width,r=s.height;var c=Ee();(c||!c&&"fixed"===e)&&(a=s.offsetLeft,l=s.offsetTop)}return{width:o,height:r,x:a+Ye(t),y:l}}(t,i)):pe(e)?function(t,e){var i=Te(t,!1,"fixed"===e);return i.top=i.top+t.clientTop,i.left=i.left+t.clientLeft,i.bottom=i.top+t.clientHeight,i.right=i.left+t.clientWidth,i.width=t.clientWidth,i.height=t.clientHeight,i.x=i.left,i.y=i.top,i}(e,i):Ze(function(t){var e,i=Le(t),n=Xe(t),s=null==(e=t.ownerDocument)?void 0:e.body,o=ve(i.scrollWidth,i.clientWidth,s?s.scrollWidth:0,s?s.clientWidth:0),r=ve(i.scrollHeight,i.clientHeight,s?s.scrollHeight:0,s?s.clientHeight:0),a=-n.scrollLeft+Ye(t),l=-n.scrollTop;return"rtl"===xe(s||i).direction&&(a+=ve(i.clientWidth,s?s.clientWidth:0)-o),{width:o,height:r,x:a,y:l}}(Le(t)))}function ei(t){var e,i=t.reference,n=t.element,s=t.placement,o=s?be(s):null,r=s?Fe(s):null,a=i.x+i.width/2-n.width/2,l=i.y+i.height/2-n.height/2;switch(o){case zt:e={x:a,y:i.y-n.height};break;case Rt:e={x:a,y:i.y+i.height};break;case qt:e={x:i.x+i.width,y:l};break;case Vt:e={x:i.x-n.width,y:l};break;default:e={x:i.x,y:i.y}}var c=o?Ie(o):null;if(null!=c){var h="y"===c?"height":"width";switch(r){case Xt:e[c]=e[c]-(i[h]/2-n[h]/2);break;case Yt:e[c]=e[c]+(i[h]/2-n[h]/2)}}return e}function ii(t,e){void 0===e&&(e={});var i=e,n=i.placement,s=void 0===n?t.placement:n,o=i.strategy,r=void 0===o?t.strategy:o,a=i.boundary,l=void 0===a?Ut:a,c=i.rootBoundary,h=void 0===c?Gt:c,d=i.elementContext,u=void 0===d?Jt:d,f=i.altBoundary,p=void 0!==f&&f,m=i.padding,g=void 0===m?0:m,_=Pe("number"!=typeof g?g:Me(g,Qt)),b=u===Jt?Zt:Jt,v=t.rects.popper,y=t.elements[p?b:u],w=function(t,e,i,n){var s="clippingParents"===e?function(t){var e=Je(Se(t)),i=["absolute","fixed"].indexOf(xe(t).position)>=0&&me(t)?$e(t):t;return pe(i)?e.filter((function(t){return pe(t)&&Oe(t,i)&&"body"!==ue(t)})):[]}(t):[].concat(e),o=[].concat(s,[i]),r=o[0],a=o.reduce((function(e,i){var s=ti(t,i,n);return e.top=ve(s.top,e.top),e.right=ye(s.right,e.right),e.bottom=ye(s.bottom,e.bottom),e.left=ve(s.left,e.left),e}),ti(t,r,n));return a.width=a.right-a.left,a.height=a.bottom-a.top,a.x=a.left,a.y=a.top,a}(pe(y)?y:y.contextElement||Le(t.elements.popper),l,h,r),A=Te(t.elements.reference),E=ei({reference:A,element:v,strategy:"absolute",placement:s}),T=Ze(Object.assign({},v,E)),C=u===Jt?T:A,O={top:w.top-C.top+_.top,bottom:C.bottom-w.bottom+_.bottom,left:w.left-C.left+_.left,right:C.right-w.right+_.right},x=t.modifiersData.offset;if(u===Jt&&x){var k=x[s];Object.keys(O).forEach((function(t){var e=[qt,Rt].indexOf(t)>=0?1:-1,i=[zt,Rt].indexOf(t)>=0?"y":"x";O[t]+=k[i]*e}))}return O}function ni(t,e){void 0===e&&(e={});var i=e,n=i.placement,s=i.boundary,o=i.rootBoundary,r=i.padding,a=i.flipVariations,l=i.allowedAutoPlacements,c=void 0===l?ee:l,h=Fe(n),d=h?a?te:te.filter((function(t){return Fe(t)===h})):Qt,u=d.filter((function(t){return c.indexOf(t)>=0}));0===u.length&&(u=d);var f=u.reduce((function(e,i){return e[i]=ii(t,{placement:i,boundary:s,rootBoundary:o,padding:r})[be(i)],e}),{});return Object.keys(f).sort((function(t,e){return f[t]-f[e]}))}const si={name:"flip",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,n=t.name;if(!e.modifiersData[n]._skip){for(var s=i.mainAxis,o=void 0===s||s,r=i.altAxis,a=void 0===r||r,l=i.fallbackPlacements,c=i.padding,h=i.boundary,d=i.rootBoundary,u=i.altBoundary,f=i.flipVariations,p=void 0===f||f,m=i.allowedAutoPlacements,g=e.options.placement,_=be(g),b=l||(_!==g&&p?function(t){if(be(t)===Kt)return[];var e=Ve(t);return[Qe(t),e,Qe(e)]}(g):[Ve(g)]),v=[g].concat(b).reduce((function(t,i){return t.concat(be(i)===Kt?ni(e,{placement:i,boundary:h,rootBoundary:d,padding:c,flipVariations:p,allowedAutoPlacements:m}):i)}),[]),y=e.rects.reference,w=e.rects.popper,A=new Map,E=!0,T=v[0],C=0;C=0,S=L?"width":"height",D=ii(e,{placement:O,boundary:h,rootBoundary:d,altBoundary:u,padding:c}),$=L?k?qt:Vt:k?Rt:zt;y[S]>w[S]&&($=Ve($));var I=Ve($),N=[];if(o&&N.push(D[x]<=0),a&&N.push(D[$]<=0,D[I]<=0),N.every((function(t){return t}))){T=O,E=!1;break}A.set(O,N)}if(E)for(var P=function(t){var e=v.find((function(e){var i=A.get(e);if(i)return i.slice(0,t).every((function(t){return t}))}));if(e)return T=e,"break"},M=p?3:1;M>0&&"break"!==P(M);M--);e.placement!==T&&(e.modifiersData[n]._skip=!0,e.placement=T,e.reset=!0)}},requiresIfExists:["offset"],data:{_skip:!1}};function oi(t,e,i){return void 0===i&&(i={x:0,y:0}),{top:t.top-e.height-i.y,right:t.right-e.width+i.x,bottom:t.bottom-e.height+i.y,left:t.left-e.width-i.x}}function ri(t){return[zt,qt,Rt,Vt].some((function(e){return t[e]>=0}))}const ai={name:"hide",enabled:!0,phase:"main",requiresIfExists:["preventOverflow"],fn:function(t){var e=t.state,i=t.name,n=e.rects.reference,s=e.rects.popper,o=e.modifiersData.preventOverflow,r=ii(e,{elementContext:"reference"}),a=ii(e,{altBoundary:!0}),l=oi(r,n),c=oi(a,s,o),h=ri(l),d=ri(c);e.modifiersData[i]={referenceClippingOffsets:l,popperEscapeOffsets:c,isReferenceHidden:h,hasPopperEscaped:d},e.attributes.popper=Object.assign({},e.attributes.popper,{"data-popper-reference-hidden":h,"data-popper-escaped":d})}},li={name:"offset",enabled:!0,phase:"main",requires:["popperOffsets"],fn:function(t){var e=t.state,i=t.options,n=t.name,s=i.offset,o=void 0===s?[0,0]:s,r=ee.reduce((function(t,i){return t[i]=function(t,e,i){var n=be(t),s=[Vt,zt].indexOf(n)>=0?-1:1,o="function"==typeof i?i(Object.assign({},e,{placement:t})):i,r=o[0],a=o[1];return r=r||0,a=(a||0)*s,[Vt,qt].indexOf(n)>=0?{x:a,y:r}:{x:r,y:a}}(i,e.rects,o),t}),{}),a=r[e.placement],l=a.x,c=a.y;null!=e.modifiersData.popperOffsets&&(e.modifiersData.popperOffsets.x+=l,e.modifiersData.popperOffsets.y+=c),e.modifiersData[n]=r}},ci={name:"popperOffsets",enabled:!0,phase:"read",fn:function(t){var e=t.state,i=t.name;e.modifiersData[i]=ei({reference:e.rects.reference,element:e.rects.popper,strategy:"absolute",placement:e.placement})},data:{}},hi={name:"preventOverflow",enabled:!0,phase:"main",fn:function(t){var e=t.state,i=t.options,n=t.name,s=i.mainAxis,o=void 0===s||s,r=i.altAxis,a=void 0!==r&&r,l=i.boundary,c=i.rootBoundary,h=i.altBoundary,d=i.padding,u=i.tether,f=void 0===u||u,p=i.tetherOffset,m=void 0===p?0:p,g=ii(e,{boundary:l,rootBoundary:c,padding:d,altBoundary:h}),_=be(e.placement),b=Fe(e.placement),v=!b,y=Ie(_),w="x"===y?"y":"x",A=e.modifiersData.popperOffsets,E=e.rects.reference,T=e.rects.popper,C="function"==typeof m?m(Object.assign({},e.rects,{placement:e.placement})):m,O="number"==typeof C?{mainAxis:C,altAxis:C}:Object.assign({mainAxis:0,altAxis:0},C),x=e.modifiersData.offset?e.modifiersData.offset[e.placement]:null,k={x:0,y:0};if(A){if(o){var L,S="y"===y?zt:Vt,D="y"===y?Rt:qt,$="y"===y?"height":"width",I=A[y],N=I+g[S],P=I-g[D],M=f?-T[$]/2:0,j=b===Xt?E[$]:T[$],F=b===Xt?-T[$]:-E[$],H=e.elements.arrow,W=f&&H?Ce(H):{width:0,height:0},B=e.modifiersData["arrow#persistent"]?e.modifiersData["arrow#persistent"].padding:{top:0,right:0,bottom:0,left:0},z=B[S],R=B[D],q=Ne(0,E[$],W[$]),V=v?E[$]/2-M-q-z-O.mainAxis:j-q-z-O.mainAxis,K=v?-E[$]/2+M+q+R+O.mainAxis:F+q+R+O.mainAxis,Q=e.elements.arrow&&$e(e.elements.arrow),X=Q?"y"===y?Q.clientTop||0:Q.clientLeft||0:0,Y=null!=(L=null==x?void 0:x[y])?L:0,U=I+K-Y,G=Ne(f?ye(N,I+V-Y-X):N,I,f?ve(P,U):P);A[y]=G,k[y]=G-I}if(a){var J,Z="x"===y?zt:Vt,tt="x"===y?Rt:qt,et=A[w],it="y"===w?"height":"width",nt=et+g[Z],st=et-g[tt],ot=-1!==[zt,Vt].indexOf(_),rt=null!=(J=null==x?void 0:x[w])?J:0,at=ot?nt:et-E[it]-T[it]-rt+O.altAxis,lt=ot?et+E[it]+T[it]-rt-O.altAxis:st,ct=f&&ot?function(t,e,i){var n=Ne(t,e,i);return n>i?i:n}(at,et,lt):Ne(f?at:nt,et,f?lt:st);A[w]=ct,k[w]=ct-et}e.modifiersData[n]=k}},requiresIfExists:["offset"]};function di(t,e,i){void 0===i&&(i=!1);var n,s,o=me(e),r=me(e)&&function(t){var e=t.getBoundingClientRect(),i=we(e.width)/t.offsetWidth||1,n=we(e.height)/t.offsetHeight||1;return 1!==i||1!==n}(e),a=Le(e),l=Te(t,r,i),c={scrollLeft:0,scrollTop:0},h={x:0,y:0};return(o||!o&&!i)&&(("body"!==ue(e)||Ue(a))&&(c=(n=e)!==fe(n)&&me(n)?{scrollLeft:(s=n).scrollLeft,scrollTop:s.scrollTop}:Xe(n)),me(e)?((h=Te(e,!0)).x+=e.clientLeft,h.y+=e.clientTop):a&&(h.x=Ye(a))),{x:l.left+c.scrollLeft-h.x,y:l.top+c.scrollTop-h.y,width:l.width,height:l.height}}function ui(t){var e=new Map,i=new Set,n=[];function s(t){i.add(t.name),[].concat(t.requires||[],t.requiresIfExists||[]).forEach((function(t){if(!i.has(t)){var n=e.get(t);n&&s(n)}})),n.push(t)}return t.forEach((function(t){e.set(t.name,t)})),t.forEach((function(t){i.has(t.name)||s(t)})),n}var fi={placement:"bottom",modifiers:[],strategy:"absolute"};function pi(){for(var t=arguments.length,e=new Array(t),i=0;iNumber.parseInt(t,10))):"function"==typeof t?e=>t(e,this._element):t}_getPopperConfig(){const t={placement:this._getPlacement(),modifiers:[{name:"preventOverflow",options:{boundary:this._config.boundary}},{name:"offset",options:{offset:this._getOffset()}}]};return(this._inNavbar||"static"===this._config.display)&&(F.setDataAttribute(this._menu,"popper","static"),t.modifiers=[{name:"applyStyles",enabled:!1}]),{...t,...g(this._config.popperConfig,[t])}}_selectMenuItem({key:t,target:e}){const i=z.find(".dropdown-menu .dropdown-item:not(.disabled):not(:disabled)",this._menu).filter((t=>a(t)));i.length&&b(i,e,t===Ti,!i.includes(e)).focus()}static jQueryInterface(t){return this.each((function(){const e=qi.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}static clearMenus(t){if(2===t.button||"keyup"===t.type&&"Tab"!==t.key)return;const e=z.find(Ni);for(const i of e){const e=qi.getInstance(i);if(!e||!1===e._config.autoClose)continue;const n=t.composedPath(),s=n.includes(e._menu);if(n.includes(e._element)||"inside"===e._config.autoClose&&!s||"outside"===e._config.autoClose&&s)continue;if(e._menu.contains(t.target)&&("keyup"===t.type&&"Tab"===t.key||/input|select|option|textarea|form/i.test(t.target.tagName)))continue;const o={relatedTarget:e._element};"click"===t.type&&(o.clickEvent=t),e._completeHide(o)}}static dataApiKeydownHandler(t){const e=/input|textarea/i.test(t.target.tagName),i="Escape"===t.key,n=[Ei,Ti].includes(t.key);if(!n&&!i)return;if(e&&!i)return;t.preventDefault();const s=this.matches(Ii)?this:z.prev(this,Ii)[0]||z.next(this,Ii)[0]||z.findOne(Ii,t.delegateTarget.parentNode),o=qi.getOrCreateInstance(s);if(n)return t.stopPropagation(),o.show(),void o._selectMenuItem(t);o._isShown()&&(t.stopPropagation(),o.hide(),s.focus())}}N.on(document,Si,Ii,qi.dataApiKeydownHandler),N.on(document,Si,Pi,qi.dataApiKeydownHandler),N.on(document,Li,qi.clearMenus),N.on(document,Di,qi.clearMenus),N.on(document,Li,Ii,(function(t){t.preventDefault(),qi.getOrCreateInstance(this).toggle()})),m(qi);const Vi="backdrop",Ki="show",Qi=`mousedown.bs.${Vi}`,Xi={className:"modal-backdrop",clickCallback:null,isAnimated:!1,isVisible:!0,rootElement:"body"},Yi={className:"string",clickCallback:"(function|null)",isAnimated:"boolean",isVisible:"boolean",rootElement:"(element|string)"};class Ui extends H{constructor(t){super(),this._config=this._getConfig(t),this._isAppended=!1,this._element=null}static get Default(){return Xi}static get DefaultType(){return Yi}static get NAME(){return Vi}show(t){if(!this._config.isVisible)return void g(t);this._append();const e=this._getElement();this._config.isAnimated&&d(e),e.classList.add(Ki),this._emulateAnimation((()=>{g(t)}))}hide(t){this._config.isVisible?(this._getElement().classList.remove(Ki),this._emulateAnimation((()=>{this.dispose(),g(t)}))):g(t)}dispose(){this._isAppended&&(N.off(this._element,Qi),this._element.remove(),this._isAppended=!1)}_getElement(){if(!this._element){const t=document.createElement("div");t.className=this._config.className,this._config.isAnimated&&t.classList.add("fade"),this._element=t}return this._element}_configAfterMerge(t){return t.rootElement=r(t.rootElement),t}_append(){if(this._isAppended)return;const t=this._getElement();this._config.rootElement.append(t),N.on(t,Qi,(()=>{g(this._config.clickCallback)})),this._isAppended=!0}_emulateAnimation(t){_(t,this._getElement(),this._config.isAnimated)}}const Gi=".bs.focustrap",Ji=`focusin${Gi}`,Zi=`keydown.tab${Gi}`,tn="backward",en={autofocus:!0,trapElement:null},nn={autofocus:"boolean",trapElement:"element"};class sn extends H{constructor(t){super(),this._config=this._getConfig(t),this._isActive=!1,this._lastTabNavDirection=null}static get Default(){return en}static get DefaultType(){return nn}static get NAME(){return"focustrap"}activate(){this._isActive||(this._config.autofocus&&this._config.trapElement.focus(),N.off(document,Gi),N.on(document,Ji,(t=>this._handleFocusin(t))),N.on(document,Zi,(t=>this._handleKeydown(t))),this._isActive=!0)}deactivate(){this._isActive&&(this._isActive=!1,N.off(document,Gi))}_handleFocusin(t){const{trapElement:e}=this._config;if(t.target===document||t.target===e||e.contains(t.target))return;const i=z.focusableChildren(e);0===i.length?e.focus():this._lastTabNavDirection===tn?i[i.length-1].focus():i[0].focus()}_handleKeydown(t){"Tab"===t.key&&(this._lastTabNavDirection=t.shiftKey?tn:"forward")}}const on=".fixed-top, .fixed-bottom, .is-fixed, .sticky-top",rn=".sticky-top",an="padding-right",ln="margin-right";class cn{constructor(){this._element=document.body}getWidth(){const t=document.documentElement.clientWidth;return Math.abs(window.innerWidth-t)}hide(){const t=this.getWidth();this._disableOverFlow(),this._setElementAttributes(this._element,an,(e=>e+t)),this._setElementAttributes(on,an,(e=>e+t)),this._setElementAttributes(rn,ln,(e=>e-t))}reset(){this._resetElementAttributes(this._element,"overflow"),this._resetElementAttributes(this._element,an),this._resetElementAttributes(on,an),this._resetElementAttributes(rn,ln)}isOverflowing(){return this.getWidth()>0}_disableOverFlow(){this._saveInitialAttribute(this._element,"overflow"),this._element.style.overflow="hidden"}_setElementAttributes(t,e,i){const n=this.getWidth();this._applyManipulationCallback(t,(t=>{if(t!==this._element&&window.innerWidth>t.clientWidth+n)return;this._saveInitialAttribute(t,e);const s=window.getComputedStyle(t).getPropertyValue(e);t.style.setProperty(e,`${i(Number.parseFloat(s))}px`)}))}_saveInitialAttribute(t,e){const i=t.style.getPropertyValue(e);i&&F.setDataAttribute(t,e,i)}_resetElementAttributes(t,e){this._applyManipulationCallback(t,(t=>{const i=F.getDataAttribute(t,e);null!==i?(F.removeDataAttribute(t,e),t.style.setProperty(e,i)):t.style.removeProperty(e)}))}_applyManipulationCallback(t,e){if(o(t))e(t);else for(const i of z.find(t,this._element))e(i)}}const hn=".bs.modal",dn=`hide${hn}`,un=`hidePrevented${hn}`,fn=`hidden${hn}`,pn=`show${hn}`,mn=`shown${hn}`,gn=`resize${hn}`,_n=`click.dismiss${hn}`,bn=`mousedown.dismiss${hn}`,vn=`keydown.dismiss${hn}`,yn=`click${hn}.data-api`,wn="modal-open",An="show",En="modal-static",Tn={backdrop:!0,focus:!0,keyboard:!0},Cn={backdrop:"(boolean|string)",focus:"boolean",keyboard:"boolean"};class On extends W{constructor(t,e){super(t,e),this._dialog=z.findOne(".modal-dialog",this._element),this._backdrop=this._initializeBackDrop(),this._focustrap=this._initializeFocusTrap(),this._isShown=!1,this._isTransitioning=!1,this._scrollBar=new cn,this._addEventListeners()}static get Default(){return Tn}static get DefaultType(){return Cn}static get NAME(){return"modal"}toggle(t){return this._isShown?this.hide():this.show(t)}show(t){this._isShown||this._isTransitioning||N.trigger(this._element,pn,{relatedTarget:t}).defaultPrevented||(this._isShown=!0,this._isTransitioning=!0,this._scrollBar.hide(),document.body.classList.add(wn),this._adjustDialog(),this._backdrop.show((()=>this._showElement(t))))}hide(){this._isShown&&!this._isTransitioning&&(N.trigger(this._element,dn).defaultPrevented||(this._isShown=!1,this._isTransitioning=!0,this._focustrap.deactivate(),this._element.classList.remove(An),this._queueCallback((()=>this._hideModal()),this._element,this._isAnimated())))}dispose(){N.off(window,hn),N.off(this._dialog,hn),this._backdrop.dispose(),this._focustrap.deactivate(),super.dispose()}handleUpdate(){this._adjustDialog()}_initializeBackDrop(){return new Ui({isVisible:Boolean(this._config.backdrop),isAnimated:this._isAnimated()})}_initializeFocusTrap(){return new sn({trapElement:this._element})}_showElement(t){document.body.contains(this._element)||document.body.append(this._element),this._element.style.display="block",this._element.removeAttribute("aria-hidden"),this._element.setAttribute("aria-modal",!0),this._element.setAttribute("role","dialog"),this._element.scrollTop=0;const e=z.findOne(".modal-body",this._dialog);e&&(e.scrollTop=0),d(this._element),this._element.classList.add(An),this._queueCallback((()=>{this._config.focus&&this._focustrap.activate(),this._isTransitioning=!1,N.trigger(this._element,mn,{relatedTarget:t})}),this._dialog,this._isAnimated())}_addEventListeners(){N.on(this._element,vn,(t=>{"Escape"===t.key&&(this._config.keyboard?this.hide():this._triggerBackdropTransition())})),N.on(window,gn,(()=>{this._isShown&&!this._isTransitioning&&this._adjustDialog()})),N.on(this._element,bn,(t=>{N.one(this._element,_n,(e=>{this._element===t.target&&this._element===e.target&&("static"!==this._config.backdrop?this._config.backdrop&&this.hide():this._triggerBackdropTransition())}))}))}_hideModal(){this._element.style.display="none",this._element.setAttribute("aria-hidden",!0),this._element.removeAttribute("aria-modal"),this._element.removeAttribute("role"),this._isTransitioning=!1,this._backdrop.hide((()=>{document.body.classList.remove(wn),this._resetAdjustments(),this._scrollBar.reset(),N.trigger(this._element,fn)}))}_isAnimated(){return this._element.classList.contains("fade")}_triggerBackdropTransition(){if(N.trigger(this._element,un).defaultPrevented)return;const t=this._element.scrollHeight>document.documentElement.clientHeight,e=this._element.style.overflowY;"hidden"===e||this._element.classList.contains(En)||(t||(this._element.style.overflowY="hidden"),this._element.classList.add(En),this._queueCallback((()=>{this._element.classList.remove(En),this._queueCallback((()=>{this._element.style.overflowY=e}),this._dialog)}),this._dialog),this._element.focus())}_adjustDialog(){const t=this._element.scrollHeight>document.documentElement.clientHeight,e=this._scrollBar.getWidth(),i=e>0;if(i&&!t){const t=p()?"paddingLeft":"paddingRight";this._element.style[t]=`${e}px`}if(!i&&t){const t=p()?"paddingRight":"paddingLeft";this._element.style[t]=`${e}px`}}_resetAdjustments(){this._element.style.paddingLeft="",this._element.style.paddingRight=""}static jQueryInterface(t,e){return this.each((function(){const i=On.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===i[t])throw new TypeError(`No method named "${t}"`);i[t](e)}}))}}N.on(document,yn,'[data-bs-toggle="modal"]',(function(t){const e=z.getElementFromSelector(this);["A","AREA"].includes(this.tagName)&&t.preventDefault(),N.one(e,pn,(t=>{t.defaultPrevented||N.one(e,fn,(()=>{a(this)&&this.focus()}))}));const i=z.findOne(".modal.show");i&&On.getInstance(i).hide(),On.getOrCreateInstance(e).toggle(this)})),R(On),m(On);const xn=".bs.offcanvas",kn=".data-api",Ln=`load${xn}${kn}`,Sn="show",Dn="showing",$n="hiding",In=".offcanvas.show",Nn=`show${xn}`,Pn=`shown${xn}`,Mn=`hide${xn}`,jn=`hidePrevented${xn}`,Fn=`hidden${xn}`,Hn=`resize${xn}`,Wn=`click${xn}${kn}`,Bn=`keydown.dismiss${xn}`,zn={backdrop:!0,keyboard:!0,scroll:!1},Rn={backdrop:"(boolean|string)",keyboard:"boolean",scroll:"boolean"};class qn extends W{constructor(t,e){super(t,e),this._isShown=!1,this._backdrop=this._initializeBackDrop(),this._focustrap=this._initializeFocusTrap(),this._addEventListeners()}static get Default(){return zn}static get DefaultType(){return Rn}static get NAME(){return"offcanvas"}toggle(t){return this._isShown?this.hide():this.show(t)}show(t){this._isShown||N.trigger(this._element,Nn,{relatedTarget:t}).defaultPrevented||(this._isShown=!0,this._backdrop.show(),this._config.scroll||(new cn).hide(),this._element.setAttribute("aria-modal",!0),this._element.setAttribute("role","dialog"),this._element.classList.add(Dn),this._queueCallback((()=>{this._config.scroll&&!this._config.backdrop||this._focustrap.activate(),this._element.classList.add(Sn),this._element.classList.remove(Dn),N.trigger(this._element,Pn,{relatedTarget:t})}),this._element,!0))}hide(){this._isShown&&(N.trigger(this._element,Mn).defaultPrevented||(this._focustrap.deactivate(),this._element.blur(),this._isShown=!1,this._element.classList.add($n),this._backdrop.hide(),this._queueCallback((()=>{this._element.classList.remove(Sn,$n),this._element.removeAttribute("aria-modal"),this._element.removeAttribute("role"),this._config.scroll||(new cn).reset(),N.trigger(this._element,Fn)}),this._element,!0)))}dispose(){this._backdrop.dispose(),this._focustrap.deactivate(),super.dispose()}_initializeBackDrop(){const t=Boolean(this._config.backdrop);return new Ui({className:"offcanvas-backdrop",isVisible:t,isAnimated:!0,rootElement:this._element.parentNode,clickCallback:t?()=>{"static"!==this._config.backdrop?this.hide():N.trigger(this._element,jn)}:null})}_initializeFocusTrap(){return new sn({trapElement:this._element})}_addEventListeners(){N.on(this._element,Bn,(t=>{"Escape"===t.key&&(this._config.keyboard?this.hide():N.trigger(this._element,jn))}))}static jQueryInterface(t){return this.each((function(){const e=qn.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}N.on(document,Wn,'[data-bs-toggle="offcanvas"]',(function(t){const e=z.getElementFromSelector(this);if(["A","AREA"].includes(this.tagName)&&t.preventDefault(),l(this))return;N.one(e,Fn,(()=>{a(this)&&this.focus()}));const i=z.findOne(In);i&&i!==e&&qn.getInstance(i).hide(),qn.getOrCreateInstance(e).toggle(this)})),N.on(window,Ln,(()=>{for(const t of z.find(In))qn.getOrCreateInstance(t).show()})),N.on(window,Hn,(()=>{for(const t of z.find("[aria-modal][class*=show][class*=offcanvas-]"))"fixed"!==getComputedStyle(t).position&&qn.getOrCreateInstance(t).hide()})),R(qn),m(qn);const Vn={"*":["class","dir","id","lang","role",/^aria-[\w-]*$/i],a:["target","href","title","rel"],area:[],b:[],br:[],col:[],code:[],div:[],em:[],hr:[],h1:[],h2:[],h3:[],h4:[],h5:[],h6:[],i:[],img:["src","srcset","alt","title","width","height"],li:[],ol:[],p:[],pre:[],s:[],small:[],span:[],sub:[],sup:[],strong:[],u:[],ul:[]},Kn=new Set(["background","cite","href","itemtype","longdesc","poster","src","xlink:href"]),Qn=/^(?!javascript:)(?:[a-z0-9+.-]+:|[^&:/?#]*(?:[/?#]|$))/i,Xn=(t,e)=>{const i=t.nodeName.toLowerCase();return e.includes(i)?!Kn.has(i)||Boolean(Qn.test(t.nodeValue)):e.filter((t=>t instanceof RegExp)).some((t=>t.test(i)))},Yn={allowList:Vn,content:{},extraClass:"",html:!1,sanitize:!0,sanitizeFn:null,template:"
"},Un={allowList:"object",content:"object",extraClass:"(string|function)",html:"boolean",sanitize:"boolean",sanitizeFn:"(null|function)",template:"string"},Gn={entry:"(string|element|function|null)",selector:"(string|element)"};class Jn extends H{constructor(t){super(),this._config=this._getConfig(t)}static get Default(){return Yn}static get DefaultType(){return Un}static get NAME(){return"TemplateFactory"}getContent(){return Object.values(this._config.content).map((t=>this._resolvePossibleFunction(t))).filter(Boolean)}hasContent(){return this.getContent().length>0}changeContent(t){return this._checkContent(t),this._config.content={...this._config.content,...t},this}toHtml(){const t=document.createElement("div");t.innerHTML=this._maybeSanitize(this._config.template);for(const[e,i]of Object.entries(this._config.content))this._setContent(t,i,e);const e=t.children[0],i=this._resolvePossibleFunction(this._config.extraClass);return i&&e.classList.add(...i.split(" ")),e}_typeCheckConfig(t){super._typeCheckConfig(t),this._checkContent(t.content)}_checkContent(t){for(const[e,i]of Object.entries(t))super._typeCheckConfig({selector:e,entry:i},Gn)}_setContent(t,e,i){const n=z.findOne(i,t);n&&((e=this._resolvePossibleFunction(e))?o(e)?this._putElementInTemplate(r(e),n):this._config.html?n.innerHTML=this._maybeSanitize(e):n.textContent=e:n.remove())}_maybeSanitize(t){return this._config.sanitize?function(t,e,i){if(!t.length)return t;if(i&&"function"==typeof i)return i(t);const n=(new window.DOMParser).parseFromString(t,"text/html"),s=[].concat(...n.body.querySelectorAll("*"));for(const t of s){const i=t.nodeName.toLowerCase();if(!Object.keys(e).includes(i)){t.remove();continue}const n=[].concat(...t.attributes),s=[].concat(e["*"]||[],e[i]||[]);for(const e of n)Xn(e,s)||t.removeAttribute(e.nodeName)}return n.body.innerHTML}(t,this._config.allowList,this._config.sanitizeFn):t}_resolvePossibleFunction(t){return g(t,[this])}_putElementInTemplate(t,e){if(this._config.html)return e.innerHTML="",void e.append(t);e.textContent=t.textContent}}const Zn=new Set(["sanitize","allowList","sanitizeFn"]),ts="fade",es="show",is=".modal",ns="hide.bs.modal",ss="hover",os="focus",rs={AUTO:"auto",TOP:"top",RIGHT:p()?"left":"right",BOTTOM:"bottom",LEFT:p()?"right":"left"},as={allowList:Vn,animation:!0,boundary:"clippingParents",container:!1,customClass:"",delay:0,fallbackPlacements:["top","right","bottom","left"],html:!1,offset:[0,6],placement:"top",popperConfig:null,sanitize:!0,sanitizeFn:null,selector:!1,template:'',title:"",trigger:"hover focus"},ls={allowList:"object",animation:"boolean",boundary:"(string|element)",container:"(string|element|boolean)",customClass:"(string|function)",delay:"(number|object)",fallbackPlacements:"array",html:"boolean",offset:"(array|string|function)",placement:"(string|function)",popperConfig:"(null|object|function)",sanitize:"boolean",sanitizeFn:"(null|function)",selector:"(string|boolean)",template:"string",title:"(string|element|function)",trigger:"string"};class cs extends W{constructor(t,e){if(void 0===vi)throw new TypeError("Bootstrap's tooltips require Popper (https://popper.js.org)");super(t,e),this._isEnabled=!0,this._timeout=0,this._isHovered=null,this._activeTrigger={},this._popper=null,this._templateFactory=null,this._newContent=null,this.tip=null,this._setListeners(),this._config.selector||this._fixTitle()}static get Default(){return as}static get DefaultType(){return ls}static get NAME(){return"tooltip"}enable(){this._isEnabled=!0}disable(){this._isEnabled=!1}toggleEnabled(){this._isEnabled=!this._isEnabled}toggle(){this._isEnabled&&(this._activeTrigger.click=!this._activeTrigger.click,this._isShown()?this._leave():this._enter())}dispose(){clearTimeout(this._timeout),N.off(this._element.closest(is),ns,this._hideModalHandler),this._element.getAttribute("data-bs-original-title")&&this._element.setAttribute("title",this._element.getAttribute("data-bs-original-title")),this._disposePopper(),super.dispose()}show(){if("none"===this._element.style.display)throw new Error("Please use show on visible elements");if(!this._isWithContent()||!this._isEnabled)return;const t=N.trigger(this._element,this.constructor.eventName("show")),e=(c(this._element)||this._element.ownerDocument.documentElement).contains(this._element);if(t.defaultPrevented||!e)return;this._disposePopper();const i=this._getTipElement();this._element.setAttribute("aria-describedby",i.getAttribute("id"));const{container:n}=this._config;if(this._element.ownerDocument.documentElement.contains(this.tip)||(n.append(i),N.trigger(this._element,this.constructor.eventName("inserted"))),this._popper=this._createPopper(i),i.classList.add(es),"ontouchstart"in document.documentElement)for(const t of[].concat(...document.body.children))N.on(t,"mouseover",h);this._queueCallback((()=>{N.trigger(this._element,this.constructor.eventName("shown")),!1===this._isHovered&&this._leave(),this._isHovered=!1}),this.tip,this._isAnimated())}hide(){if(this._isShown()&&!N.trigger(this._element,this.constructor.eventName("hide")).defaultPrevented){if(this._getTipElement().classList.remove(es),"ontouchstart"in document.documentElement)for(const t of[].concat(...document.body.children))N.off(t,"mouseover",h);this._activeTrigger.click=!1,this._activeTrigger[os]=!1,this._activeTrigger[ss]=!1,this._isHovered=null,this._queueCallback((()=>{this._isWithActiveTrigger()||(this._isHovered||this._disposePopper(),this._element.removeAttribute("aria-describedby"),N.trigger(this._element,this.constructor.eventName("hidden")))}),this.tip,this._isAnimated())}}update(){this._popper&&this._popper.update()}_isWithContent(){return Boolean(this._getTitle())}_getTipElement(){return this.tip||(this.tip=this._createTipElement(this._newContent||this._getContentForTemplate())),this.tip}_createTipElement(t){const e=this._getTemplateFactory(t).toHtml();if(!e)return null;e.classList.remove(ts,es),e.classList.add(`bs-${this.constructor.NAME}-auto`);const i=(t=>{do{t+=Math.floor(1e6*Math.random())}while(document.getElementById(t));return t})(this.constructor.NAME).toString();return e.setAttribute("id",i),this._isAnimated()&&e.classList.add(ts),e}setContent(t){this._newContent=t,this._isShown()&&(this._disposePopper(),this.show())}_getTemplateFactory(t){return this._templateFactory?this._templateFactory.changeContent(t):this._templateFactory=new Jn({...this._config,content:t,extraClass:this._resolvePossibleFunction(this._config.customClass)}),this._templateFactory}_getContentForTemplate(){return{".tooltip-inner":this._getTitle()}}_getTitle(){return this._resolvePossibleFunction(this._config.title)||this._element.getAttribute("data-bs-original-title")}_initializeOnDelegatedTarget(t){return this.constructor.getOrCreateInstance(t.delegateTarget,this._getDelegateConfig())}_isAnimated(){return this._config.animation||this.tip&&this.tip.classList.contains(ts)}_isShown(){return this.tip&&this.tip.classList.contains(es)}_createPopper(t){const e=g(this._config.placement,[this,t,this._element]),i=rs[e.toUpperCase()];return bi(this._element,t,this._getPopperConfig(i))}_getOffset(){const{offset:t}=this._config;return"string"==typeof t?t.split(",").map((t=>Number.parseInt(t,10))):"function"==typeof t?e=>t(e,this._element):t}_resolvePossibleFunction(t){return g(t,[this._element])}_getPopperConfig(t){const e={placement:t,modifiers:[{name:"flip",options:{fallbackPlacements:this._config.fallbackPlacements}},{name:"offset",options:{offset:this._getOffset()}},{name:"preventOverflow",options:{boundary:this._config.boundary}},{name:"arrow",options:{element:`.${this.constructor.NAME}-arrow`}},{name:"preSetPlacement",enabled:!0,phase:"beforeMain",fn:t=>{this._getTipElement().setAttribute("data-popper-placement",t.state.placement)}}]};return{...e,...g(this._config.popperConfig,[e])}}_setListeners(){const t=this._config.trigger.split(" ");for(const e of t)if("click"===e)N.on(this._element,this.constructor.eventName("click"),this._config.selector,(t=>{this._initializeOnDelegatedTarget(t).toggle()}));else if("manual"!==e){const t=e===ss?this.constructor.eventName("mouseenter"):this.constructor.eventName("focusin"),i=e===ss?this.constructor.eventName("mouseleave"):this.constructor.eventName("focusout");N.on(this._element,t,this._config.selector,(t=>{const e=this._initializeOnDelegatedTarget(t);e._activeTrigger["focusin"===t.type?os:ss]=!0,e._enter()})),N.on(this._element,i,this._config.selector,(t=>{const e=this._initializeOnDelegatedTarget(t);e._activeTrigger["focusout"===t.type?os:ss]=e._element.contains(t.relatedTarget),e._leave()}))}this._hideModalHandler=()=>{this._element&&this.hide()},N.on(this._element.closest(is),ns,this._hideModalHandler)}_fixTitle(){const t=this._element.getAttribute("title");t&&(this._element.getAttribute("aria-label")||this._element.textContent.trim()||this._element.setAttribute("aria-label",t),this._element.setAttribute("data-bs-original-title",t),this._element.removeAttribute("title"))}_enter(){this._isShown()||this._isHovered?this._isHovered=!0:(this._isHovered=!0,this._setTimeout((()=>{this._isHovered&&this.show()}),this._config.delay.show))}_leave(){this._isWithActiveTrigger()||(this._isHovered=!1,this._setTimeout((()=>{this._isHovered||this.hide()}),this._config.delay.hide))}_setTimeout(t,e){clearTimeout(this._timeout),this._timeout=setTimeout(t,e)}_isWithActiveTrigger(){return Object.values(this._activeTrigger).includes(!0)}_getConfig(t){const e=F.getDataAttributes(this._element);for(const t of Object.keys(e))Zn.has(t)&&delete e[t];return t={...e,..."object"==typeof t&&t?t:{}},t=this._mergeConfigObj(t),t=this._configAfterMerge(t),this._typeCheckConfig(t),t}_configAfterMerge(t){return t.container=!1===t.container?document.body:r(t.container),"number"==typeof t.delay&&(t.delay={show:t.delay,hide:t.delay}),"number"==typeof t.title&&(t.title=t.title.toString()),"number"==typeof t.content&&(t.content=t.content.toString()),t}_getDelegateConfig(){const t={};for(const[e,i]of Object.entries(this._config))this.constructor.Default[e]!==i&&(t[e]=i);return t.selector=!1,t.trigger="manual",t}_disposePopper(){this._popper&&(this._popper.destroy(),this._popper=null),this.tip&&(this.tip.remove(),this.tip=null)}static jQueryInterface(t){return this.each((function(){const e=cs.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}m(cs);const hs={...cs.Default,content:"",offset:[0,8],placement:"right",template:'',trigger:"click"},ds={...cs.DefaultType,content:"(null|string|element|function)"};class us extends cs{static get Default(){return hs}static get DefaultType(){return ds}static get NAME(){return"popover"}_isWithContent(){return this._getTitle()||this._getContent()}_getContentForTemplate(){return{".popover-header":this._getTitle(),".popover-body":this._getContent()}}_getContent(){return this._resolvePossibleFunction(this._config.content)}static jQueryInterface(t){return this.each((function(){const e=us.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t]()}}))}}m(us);const fs=".bs.scrollspy",ps=`activate${fs}`,ms=`click${fs}`,gs=`load${fs}.data-api`,_s="active",bs="[href]",vs=".nav-link",ys=`${vs}, .nav-item > ${vs}, .list-group-item`,ws={offset:null,rootMargin:"0px 0px -25%",smoothScroll:!1,target:null,threshold:[.1,.5,1]},As={offset:"(number|null)",rootMargin:"string",smoothScroll:"boolean",target:"element",threshold:"array"};class Es extends W{constructor(t,e){super(t,e),this._targetLinks=new Map,this._observableSections=new Map,this._rootElement="visible"===getComputedStyle(this._element).overflowY?null:this._element,this._activeTarget=null,this._observer=null,this._previousScrollData={visibleEntryTop:0,parentScrollTop:0},this.refresh()}static get Default(){return ws}static get DefaultType(){return As}static get NAME(){return"scrollspy"}refresh(){this._initializeTargetsAndObservables(),this._maybeEnableSmoothScroll(),this._observer?this._observer.disconnect():this._observer=this._getNewObserver();for(const t of this._observableSections.values())this._observer.observe(t)}dispose(){this._observer.disconnect(),super.dispose()}_configAfterMerge(t){return t.target=r(t.target)||document.body,t.rootMargin=t.offset?`${t.offset}px 0px -30%`:t.rootMargin,"string"==typeof t.threshold&&(t.threshold=t.threshold.split(",").map((t=>Number.parseFloat(t)))),t}_maybeEnableSmoothScroll(){this._config.smoothScroll&&(N.off(this._config.target,ms),N.on(this._config.target,ms,bs,(t=>{const e=this._observableSections.get(t.target.hash);if(e){t.preventDefault();const i=this._rootElement||window,n=e.offsetTop-this._element.offsetTop;if(i.scrollTo)return void i.scrollTo({top:n,behavior:"smooth"});i.scrollTop=n}})))}_getNewObserver(){const t={root:this._rootElement,threshold:this._config.threshold,rootMargin:this._config.rootMargin};return new IntersectionObserver((t=>this._observerCallback(t)),t)}_observerCallback(t){const e=t=>this._targetLinks.get(`#${t.target.id}`),i=t=>{this._previousScrollData.visibleEntryTop=t.target.offsetTop,this._process(e(t))},n=(this._rootElement||document.documentElement).scrollTop,s=n>=this._previousScrollData.parentScrollTop;this._previousScrollData.parentScrollTop=n;for(const o of t){if(!o.isIntersecting){this._activeTarget=null,this._clearActiveClass(e(o));continue}const t=o.target.offsetTop>=this._previousScrollData.visibleEntryTop;if(s&&t){if(i(o),!n)return}else s||t||i(o)}}_initializeTargetsAndObservables(){this._targetLinks=new Map,this._observableSections=new Map;const t=z.find(bs,this._config.target);for(const e of t){if(!e.hash||l(e))continue;const t=z.findOne(decodeURI(e.hash),this._element);a(t)&&(this._targetLinks.set(decodeURI(e.hash),e),this._observableSections.set(e.hash,t))}}_process(t){this._activeTarget!==t&&(this._clearActiveClass(this._config.target),this._activeTarget=t,t.classList.add(_s),this._activateParents(t),N.trigger(this._element,ps,{relatedTarget:t}))}_activateParents(t){if(t.classList.contains("dropdown-item"))z.findOne(".dropdown-toggle",t.closest(".dropdown")).classList.add(_s);else for(const e of z.parents(t,".nav, .list-group"))for(const t of z.prev(e,ys))t.classList.add(_s)}_clearActiveClass(t){t.classList.remove(_s);const e=z.find(`${bs}.${_s}`,t);for(const t of e)t.classList.remove(_s)}static jQueryInterface(t){return this.each((function(){const e=Es.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t]()}}))}}N.on(window,gs,(()=>{for(const t of z.find('[data-bs-spy="scroll"]'))Es.getOrCreateInstance(t)})),m(Es);const Ts=".bs.tab",Cs=`hide${Ts}`,Os=`hidden${Ts}`,xs=`show${Ts}`,ks=`shown${Ts}`,Ls=`click${Ts}`,Ss=`keydown${Ts}`,Ds=`load${Ts}`,$s="ArrowLeft",Is="ArrowRight",Ns="ArrowUp",Ps="ArrowDown",Ms="Home",js="End",Fs="active",Hs="fade",Ws="show",Bs=":not(.dropdown-toggle)",zs='[data-bs-toggle="tab"], [data-bs-toggle="pill"], [data-bs-toggle="list"]',Rs=`.nav-link${Bs}, .list-group-item${Bs}, [role="tab"]${Bs}, ${zs}`,qs=`.${Fs}[data-bs-toggle="tab"], .${Fs}[data-bs-toggle="pill"], .${Fs}[data-bs-toggle="list"]`;class Vs extends W{constructor(t){super(t),this._parent=this._element.closest('.list-group, .nav, [role="tablist"]'),this._parent&&(this._setInitialAttributes(this._parent,this._getChildren()),N.on(this._element,Ss,(t=>this._keydown(t))))}static get NAME(){return"tab"}show(){const t=this._element;if(this._elemIsActive(t))return;const e=this._getActiveElem(),i=e?N.trigger(e,Cs,{relatedTarget:t}):null;N.trigger(t,xs,{relatedTarget:e}).defaultPrevented||i&&i.defaultPrevented||(this._deactivate(e,t),this._activate(t,e))}_activate(t,e){t&&(t.classList.add(Fs),this._activate(z.getElementFromSelector(t)),this._queueCallback((()=>{"tab"===t.getAttribute("role")?(t.removeAttribute("tabindex"),t.setAttribute("aria-selected",!0),this._toggleDropDown(t,!0),N.trigger(t,ks,{relatedTarget:e})):t.classList.add(Ws)}),t,t.classList.contains(Hs)))}_deactivate(t,e){t&&(t.classList.remove(Fs),t.blur(),this._deactivate(z.getElementFromSelector(t)),this._queueCallback((()=>{"tab"===t.getAttribute("role")?(t.setAttribute("aria-selected",!1),t.setAttribute("tabindex","-1"),this._toggleDropDown(t,!1),N.trigger(t,Os,{relatedTarget:e})):t.classList.remove(Ws)}),t,t.classList.contains(Hs)))}_keydown(t){if(![$s,Is,Ns,Ps,Ms,js].includes(t.key))return;t.stopPropagation(),t.preventDefault();const e=this._getChildren().filter((t=>!l(t)));let i;if([Ms,js].includes(t.key))i=e[t.key===Ms?0:e.length-1];else{const n=[Is,Ps].includes(t.key);i=b(e,t.target,n,!0)}i&&(i.focus({preventScroll:!0}),Vs.getOrCreateInstance(i).show())}_getChildren(){return z.find(Rs,this._parent)}_getActiveElem(){return this._getChildren().find((t=>this._elemIsActive(t)))||null}_setInitialAttributes(t,e){this._setAttributeIfNotExists(t,"role","tablist");for(const t of e)this._setInitialAttributesOnChild(t)}_setInitialAttributesOnChild(t){t=this._getInnerElement(t);const e=this._elemIsActive(t),i=this._getOuterElement(t);t.setAttribute("aria-selected",e),i!==t&&this._setAttributeIfNotExists(i,"role","presentation"),e||t.setAttribute("tabindex","-1"),this._setAttributeIfNotExists(t,"role","tab"),this._setInitialAttributesOnTargetPanel(t)}_setInitialAttributesOnTargetPanel(t){const e=z.getElementFromSelector(t);e&&(this._setAttributeIfNotExists(e,"role","tabpanel"),t.id&&this._setAttributeIfNotExists(e,"aria-labelledby",`${t.id}`))}_toggleDropDown(t,e){const i=this._getOuterElement(t);if(!i.classList.contains("dropdown"))return;const n=(t,n)=>{const s=z.findOne(t,i);s&&s.classList.toggle(n,e)};n(".dropdown-toggle",Fs),n(".dropdown-menu",Ws),i.setAttribute("aria-expanded",e)}_setAttributeIfNotExists(t,e,i){t.hasAttribute(e)||t.setAttribute(e,i)}_elemIsActive(t){return t.classList.contains(Fs)}_getInnerElement(t){return t.matches(Rs)?t:z.findOne(Rs,t)}_getOuterElement(t){return t.closest(".nav-item, .list-group-item")||t}static jQueryInterface(t){return this.each((function(){const e=Vs.getOrCreateInstance(this);if("string"==typeof t){if(void 0===e[t]||t.startsWith("_")||"constructor"===t)throw new TypeError(`No method named "${t}"`);e[t]()}}))}}N.on(document,Ls,zs,(function(t){["A","AREA"].includes(this.tagName)&&t.preventDefault(),l(this)||Vs.getOrCreateInstance(this).show()})),N.on(window,Ds,(()=>{for(const t of z.find(qs))Vs.getOrCreateInstance(t)})),m(Vs);const Ks=".bs.toast",Qs=`mouseover${Ks}`,Xs=`mouseout${Ks}`,Ys=`focusin${Ks}`,Us=`focusout${Ks}`,Gs=`hide${Ks}`,Js=`hidden${Ks}`,Zs=`show${Ks}`,to=`shown${Ks}`,eo="hide",io="show",no="showing",so={animation:"boolean",autohide:"boolean",delay:"number"},oo={animation:!0,autohide:!0,delay:5e3};class ro extends W{constructor(t,e){super(t,e),this._timeout=null,this._hasMouseInteraction=!1,this._hasKeyboardInteraction=!1,this._setListeners()}static get Default(){return oo}static get DefaultType(){return so}static get NAME(){return"toast"}show(){N.trigger(this._element,Zs).defaultPrevented||(this._clearTimeout(),this._config.animation&&this._element.classList.add("fade"),this._element.classList.remove(eo),d(this._element),this._element.classList.add(io,no),this._queueCallback((()=>{this._element.classList.remove(no),N.trigger(this._element,to),this._maybeScheduleHide()}),this._element,this._config.animation))}hide(){this.isShown()&&(N.trigger(this._element,Gs).defaultPrevented||(this._element.classList.add(no),this._queueCallback((()=>{this._element.classList.add(eo),this._element.classList.remove(no,io),N.trigger(this._element,Js)}),this._element,this._config.animation)))}dispose(){this._clearTimeout(),this.isShown()&&this._element.classList.remove(io),super.dispose()}isShown(){return this._element.classList.contains(io)}_maybeScheduleHide(){this._config.autohide&&(this._hasMouseInteraction||this._hasKeyboardInteraction||(this._timeout=setTimeout((()=>{this.hide()}),this._config.delay)))}_onInteraction(t,e){switch(t.type){case"mouseover":case"mouseout":this._hasMouseInteraction=e;break;case"focusin":case"focusout":this._hasKeyboardInteraction=e}if(e)return void this._clearTimeout();const i=t.relatedTarget;this._element===i||this._element.contains(i)||this._maybeScheduleHide()}_setListeners(){N.on(this._element,Qs,(t=>this._onInteraction(t,!0))),N.on(this._element,Xs,(t=>this._onInteraction(t,!1))),N.on(this._element,Ys,(t=>this._onInteraction(t,!0))),N.on(this._element,Us,(t=>this._onInteraction(t,!1)))}_clearTimeout(){clearTimeout(this._timeout),this._timeout=null}static jQueryInterface(t){return this.each((function(){const e=ro.getOrCreateInstance(this,t);if("string"==typeof t){if(void 0===e[t])throw new TypeError(`No method named "${t}"`);e[t](this)}}))}}return R(ro),m(ro),{Alert:Q,Button:Y,Carousel:xt,Collapse:Bt,Dropdown:qi,Modal:On,Offcanvas:qn,Popover:us,ScrollSpy:Es,Tab:Vs,Toast:ro,Tooltip:cs}})); +//# sourceMappingURL=bootstrap.bundle.min.js.map \ No newline at end of file diff --git a/docs/deps/bootstrap-5.3.1/bootstrap.bundle.min.js.map b/docs/deps/bootstrap-5.3.1/bootstrap.bundle.min.js.map new file mode 100644 index 0000000000..3863da8b7f --- /dev/null +++ b/docs/deps/bootstrap-5.3.1/bootstrap.bundle.min.js.map @@ -0,0 +1 @@ +{"version":3,"names":["elementMap","Map","Data","set","element","key","instance","has","instanceMap","get","size","console","error","Array","from","keys","remove","delete","TRANSITION_END","parseSelector","selector","window","CSS","escape","replace","match","id","triggerTransitionEnd","dispatchEvent","Event","isElement","object","jquery","nodeType","getElement","length","document","querySelector","isVisible","getClientRects","elementIsVisible","getComputedStyle","getPropertyValue","closedDetails","closest","summary","parentNode","isDisabled","Node","ELEMENT_NODE","classList","contains","disabled","hasAttribute","getAttribute","findShadowRoot","documentElement","attachShadow","getRootNode","root","ShadowRoot","noop","reflow","offsetHeight","getjQuery","jQuery","body","DOMContentLoadedCallbacks","isRTL","dir","defineJQueryPlugin","plugin","callback","$","name","NAME","JQUERY_NO_CONFLICT","fn","jQueryInterface","Constructor","noConflict","readyState","addEventListener","push","execute","possibleCallback","args","defaultValue","executeAfterTransition","transitionElement","waitForTransition","emulatedDuration","transitionDuration","transitionDelay","floatTransitionDuration","Number","parseFloat","floatTransitionDelay","split","getTransitionDurationFromElement","called","handler","target","removeEventListener","setTimeout","getNextActiveElement","list","activeElement","shouldGetNext","isCycleAllowed","listLength","index","indexOf","Math","max","min","namespaceRegex","stripNameRegex","stripUidRegex","eventRegistry","uidEvent","customEvents","mouseenter","mouseleave","nativeEvents","Set","makeEventUid","uid","getElementEvents","findHandler","events","callable","delegationSelector","Object","values","find","event","normalizeParameters","originalTypeEvent","delegationFunction","isDelegated","typeEvent","getTypeEvent","addHandler","oneOff","wrapFunction","relatedTarget","delegateTarget","call","this","handlers","previousFunction","domElements","querySelectorAll","domElement","hydrateObj","EventHandler","off","type","apply","bootstrapDelegationHandler","bootstrapHandler","removeHandler","Boolean","removeNamespacedHandlers","namespace","storeElementEvent","handlerKey","entries","includes","on","one","inNamespace","isNamespace","startsWith","elementEvent","slice","keyHandlers","trigger","jQueryEvent","bubbles","nativeDispatch","defaultPrevented","isPropagationStopped","isImmediatePropagationStopped","isDefaultPrevented","evt","cancelable","preventDefault","obj","meta","value","_unused","defineProperty","configurable","normalizeData","toString","JSON","parse","decodeURIComponent","normalizeDataKey","chr","toLowerCase","Manipulator","setDataAttribute","setAttribute","removeDataAttribute","removeAttribute","getDataAttributes","attributes","bsKeys","dataset","filter","pureKey","charAt","getDataAttribute","Config","Default","DefaultType","Error","_getConfig","config","_mergeConfigObj","_configAfterMerge","_typeCheckConfig","jsonConfig","constructor","configTypes","property","expectedTypes","valueType","prototype","RegExp","test","TypeError","toUpperCase","BaseComponent","super","_element","_config","DATA_KEY","dispose","EVENT_KEY","propertyName","getOwnPropertyNames","_queueCallback","isAnimated","getInstance","getOrCreateInstance","VERSION","eventName","getSelector","hrefAttribute","trim","SelectorEngine","concat","Element","findOne","children","child","matches","parents","ancestor","prev","previous","previousElementSibling","next","nextElementSibling","focusableChildren","focusables","map","join","el","getSelectorFromElement","getElementFromSelector","getMultipleElementsFromSelector","enableDismissTrigger","component","method","clickEvent","tagName","EVENT_CLOSE","EVENT_CLOSED","Alert","close","_destroyElement","each","data","undefined","SELECTOR_DATA_TOGGLE","Button","toggle","button","EVENT_TOUCHSTART","EVENT_TOUCHMOVE","EVENT_TOUCHEND","EVENT_POINTERDOWN","EVENT_POINTERUP","endCallback","leftCallback","rightCallback","Swipe","isSupported","_deltaX","_supportPointerEvents","PointerEvent","_initEvents","_start","_eventIsPointerPenTouch","clientX","touches","_end","_handleSwipe","_move","absDeltaX","abs","direction","add","pointerType","navigator","maxTouchPoints","DATA_API_KEY","ORDER_NEXT","ORDER_PREV","DIRECTION_LEFT","DIRECTION_RIGHT","EVENT_SLIDE","EVENT_SLID","EVENT_KEYDOWN","EVENT_MOUSEENTER","EVENT_MOUSELEAVE","EVENT_DRAG_START","EVENT_LOAD_DATA_API","EVENT_CLICK_DATA_API","CLASS_NAME_CAROUSEL","CLASS_NAME_ACTIVE","SELECTOR_ACTIVE","SELECTOR_ITEM","SELECTOR_ACTIVE_ITEM","KEY_TO_DIRECTION","ArrowLeft","ArrowRight","interval","keyboard","pause","ride","touch","wrap","Carousel","_interval","_activeElement","_isSliding","touchTimeout","_swipeHelper","_indicatorsElement","_addEventListeners","cycle","_slide","nextWhenVisible","hidden","_clearInterval","_updateInterval","setInterval","_maybeEnableCycle","to","items","_getItems","activeIndex","_getItemIndex","_getActive","order","defaultInterval","_keydown","_addTouchEventListeners","img","swipeConfig","_directionToOrder","endCallBack","clearTimeout","_setActiveIndicatorElement","activeIndicator","newActiveIndicator","elementInterval","parseInt","isNext","nextElement","nextElementIndex","triggerEvent","_orderToDirection","isCycling","directionalClassName","orderClassName","completeCallBack","_isAnimated","clearInterval","carousel","slideIndex","carousels","EVENT_SHOW","EVENT_SHOWN","EVENT_HIDE","EVENT_HIDDEN","CLASS_NAME_SHOW","CLASS_NAME_COLLAPSE","CLASS_NAME_COLLAPSING","CLASS_NAME_DEEPER_CHILDREN","parent","Collapse","_isTransitioning","_triggerArray","toggleList","elem","filterElement","foundElement","_initializeChildren","_addAriaAndCollapsedClass","_isShown","hide","show","activeChildren","_getFirstLevelChildren","activeInstance","dimension","_getDimension","style","scrollSize","complete","getBoundingClientRect","selected","triggerArray","isOpen","top","bottom","right","left","auto","basePlacements","start","end","clippingParents","viewport","popper","reference","variationPlacements","reduce","acc","placement","placements","beforeRead","read","afterRead","beforeMain","main","afterMain","beforeWrite","write","afterWrite","modifierPhases","getNodeName","nodeName","getWindow","node","ownerDocument","defaultView","isHTMLElement","HTMLElement","isShadowRoot","applyStyles$1","enabled","phase","_ref","state","elements","forEach","styles","assign","effect","_ref2","initialStyles","position","options","strategy","margin","arrow","hasOwnProperty","attribute","requires","getBasePlacement","round","getUAString","uaData","userAgentData","brands","isArray","item","brand","version","userAgent","isLayoutViewport","includeScale","isFixedStrategy","clientRect","scaleX","scaleY","offsetWidth","width","height","visualViewport","addVisualOffsets","x","offsetLeft","y","offsetTop","getLayoutRect","rootNode","isSameNode","host","isTableElement","getDocumentElement","getParentNode","assignedSlot","getTrueOffsetParent","offsetParent","getOffsetParent","isFirefox","currentNode","css","transform","perspective","contain","willChange","getContainingBlock","getMainAxisFromPlacement","within","mathMax","mathMin","mergePaddingObject","paddingObject","expandToHashMap","hashMap","arrow$1","_state$modifiersData$","arrowElement","popperOffsets","modifiersData","basePlacement","axis","len","padding","rects","toPaddingObject","arrowRect","minProp","maxProp","endDiff","startDiff","arrowOffsetParent","clientSize","clientHeight","clientWidth","centerToReference","center","offset","axisProp","centerOffset","_options$element","requiresIfExists","getVariation","unsetSides","mapToStyles","_Object$assign2","popperRect","variation","offsets","gpuAcceleration","adaptive","roundOffsets","isFixed","_offsets$x","_offsets$y","_ref3","hasX","hasY","sideX","sideY","win","heightProp","widthProp","_Object$assign","commonStyles","_ref4","dpr","devicePixelRatio","roundOffsetsByDPR","computeStyles$1","_ref5","_options$gpuAccelerat","_options$adaptive","_options$roundOffsets","passive","eventListeners","_options$scroll","scroll","_options$resize","resize","scrollParents","scrollParent","update","hash","getOppositePlacement","matched","getOppositeVariationPlacement","getWindowScroll","scrollLeft","pageXOffset","scrollTop","pageYOffset","getWindowScrollBarX","isScrollParent","_getComputedStyle","overflow","overflowX","overflowY","getScrollParent","listScrollParents","_element$ownerDocumen","isBody","updatedList","rectToClientRect","rect","getClientRectFromMixedType","clippingParent","html","layoutViewport","getViewportRect","clientTop","clientLeft","getInnerBoundingClientRect","winScroll","scrollWidth","scrollHeight","getDocumentRect","computeOffsets","commonX","commonY","mainAxis","detectOverflow","_options","_options$placement","_options$strategy","_options$boundary","boundary","_options$rootBoundary","rootBoundary","_options$elementConte","elementContext","_options$altBoundary","altBoundary","_options$padding","altContext","clippingClientRect","mainClippingParents","clipperElement","getClippingParents","firstClippingParent","clippingRect","accRect","getClippingRect","contextElement","referenceClientRect","popperClientRect","elementClientRect","overflowOffsets","offsetData","multiply","computeAutoPlacement","flipVariations","_options$allowedAutoP","allowedAutoPlacements","allPlacements","allowedPlacements","overflows","sort","a","b","flip$1","_skip","_options$mainAxis","checkMainAxis","_options$altAxis","altAxis","checkAltAxis","specifiedFallbackPlacements","fallbackPlacements","_options$flipVariatio","preferredPlacement","oppositePlacement","getExpandedFallbackPlacements","referenceRect","checksMap","makeFallbackChecks","firstFittingPlacement","i","_basePlacement","isStartVariation","isVertical","mainVariationSide","altVariationSide","checks","every","check","_loop","_i","fittingPlacement","reset","getSideOffsets","preventedOffsets","isAnySideFullyClipped","some","side","hide$1","preventOverflow","referenceOverflow","popperAltOverflow","referenceClippingOffsets","popperEscapeOffsets","isReferenceHidden","hasPopperEscaped","offset$1","_options$offset","invertDistance","skidding","distance","distanceAndSkiddingToXY","_data$state$placement","popperOffsets$1","preventOverflow$1","_options$tether","tether","_options$tetherOffset","tetherOffset","isBasePlacement","tetherOffsetValue","normalizedTetherOffsetValue","offsetModifierState","_offsetModifierState$","mainSide","altSide","additive","minLen","maxLen","arrowPaddingObject","arrowPaddingMin","arrowPaddingMax","arrowLen","minOffset","maxOffset","clientOffset","offsetModifierValue","tetherMax","preventedOffset","_offsetModifierState$2","_mainSide","_altSide","_offset","_len","_min","_max","isOriginSide","_offsetModifierValue","_tetherMin","_tetherMax","_preventedOffset","v","withinMaxClamp","getCompositeRect","elementOrVirtualElement","isOffsetParentAnElement","offsetParentIsScaled","isElementScaled","modifiers","visited","result","modifier","dep","depModifier","DEFAULT_OPTIONS","areValidElements","arguments","_key","popperGenerator","generatorOptions","_generatorOptions","_generatorOptions$def","defaultModifiers","_generatorOptions$def2","defaultOptions","pending","orderedModifiers","effectCleanupFns","isDestroyed","setOptions","setOptionsAction","cleanupModifierEffects","merged","orderModifiers","current","existing","m","_ref$options","cleanupFn","forceUpdate","_state$elements","_state$orderedModifie","_state$orderedModifie2","Promise","resolve","then","destroy","onFirstUpdate","createPopper","computeStyles","applyStyles","flip","ARROW_UP_KEY","ARROW_DOWN_KEY","EVENT_KEYDOWN_DATA_API","EVENT_KEYUP_DATA_API","SELECTOR_DATA_TOGGLE_SHOWN","SELECTOR_MENU","PLACEMENT_TOP","PLACEMENT_TOPEND","PLACEMENT_BOTTOM","PLACEMENT_BOTTOMEND","PLACEMENT_RIGHT","PLACEMENT_LEFT","autoClose","display","popperConfig","Dropdown","_popper","_parent","_menu","_inNavbar","_detectNavbar","_createPopper","focus","_completeHide","Popper","referenceElement","_getPopperConfig","_getPlacement","parentDropdown","isEnd","_getOffset","popperData","defaultBsPopperConfig","_selectMenuItem","clearMenus","openToggles","context","composedPath","isMenuTarget","dataApiKeydownHandler","isInput","isEscapeEvent","isUpOrDownEvent","getToggleButton","stopPropagation","EVENT_MOUSEDOWN","className","clickCallback","rootElement","Backdrop","_isAppended","_append","_getElement","_emulateAnimation","backdrop","createElement","append","EVENT_FOCUSIN","EVENT_KEYDOWN_TAB","TAB_NAV_BACKWARD","autofocus","trapElement","FocusTrap","_isActive","_lastTabNavDirection","activate","_handleFocusin","_handleKeydown","deactivate","shiftKey","SELECTOR_FIXED_CONTENT","SELECTOR_STICKY_CONTENT","PROPERTY_PADDING","PROPERTY_MARGIN","ScrollBarHelper","getWidth","documentWidth","innerWidth","_disableOverFlow","_setElementAttributes","calculatedValue","_resetElementAttributes","isOverflowing","_saveInitialAttribute","styleProperty","scrollbarWidth","_applyManipulationCallback","setProperty","actualValue","removeProperty","callBack","sel","EVENT_HIDE_PREVENTED","EVENT_RESIZE","EVENT_CLICK_DISMISS","EVENT_MOUSEDOWN_DISMISS","EVENT_KEYDOWN_DISMISS","CLASS_NAME_OPEN","CLASS_NAME_STATIC","Modal","_dialog","_backdrop","_initializeBackDrop","_focustrap","_initializeFocusTrap","_scrollBar","_adjustDialog","_showElement","_hideModal","handleUpdate","modalBody","transitionComplete","_triggerBackdropTransition","event2","_resetAdjustments","isModalOverflowing","initialOverflowY","isBodyOverflowing","paddingLeft","paddingRight","showEvent","alreadyOpen","CLASS_NAME_SHOWING","CLASS_NAME_HIDING","OPEN_SELECTOR","Offcanvas","blur","completeCallback","DefaultAllowlist","area","br","col","code","div","em","hr","h1","h2","h3","h4","h5","h6","li","ol","p","pre","s","small","span","sub","sup","strong","u","ul","uriAttributes","SAFE_URL_PATTERN","allowedAttribute","allowedAttributeList","attributeName","nodeValue","attributeRegex","regex","allowList","content","extraClass","sanitize","sanitizeFn","template","DefaultContentType","entry","TemplateFactory","getContent","_resolvePossibleFunction","hasContent","changeContent","_checkContent","toHtml","templateWrapper","innerHTML","_maybeSanitize","text","_setContent","arg","templateElement","_putElementInTemplate","textContent","unsafeHtml","sanitizeFunction","createdDocument","DOMParser","parseFromString","elementName","attributeList","allowedAttributes","sanitizeHtml","DISALLOWED_ATTRIBUTES","CLASS_NAME_FADE","SELECTOR_MODAL","EVENT_MODAL_HIDE","TRIGGER_HOVER","TRIGGER_FOCUS","AttachmentMap","AUTO","TOP","RIGHT","BOTTOM","LEFT","animation","container","customClass","delay","title","Tooltip","_isEnabled","_timeout","_isHovered","_activeTrigger","_templateFactory","_newContent","tip","_setListeners","_fixTitle","enable","disable","toggleEnabled","click","_leave","_enter","_hideModalHandler","_disposePopper","_isWithContent","isInTheDom","_getTipElement","_isWithActiveTrigger","_getTitle","_createTipElement","_getContentForTemplate","_getTemplateFactory","tipId","prefix","floor","random","getElementById","getUID","setContent","_initializeOnDelegatedTarget","_getDelegateConfig","attachment","triggers","eventIn","eventOut","_setTimeout","timeout","dataAttributes","dataAttribute","Popover","_getContent","EVENT_ACTIVATE","EVENT_CLICK","SELECTOR_TARGET_LINKS","SELECTOR_NAV_LINKS","SELECTOR_LINK_ITEMS","rootMargin","smoothScroll","threshold","ScrollSpy","_targetLinks","_observableSections","_rootElement","_activeTarget","_observer","_previousScrollData","visibleEntryTop","parentScrollTop","refresh","_initializeTargetsAndObservables","_maybeEnableSmoothScroll","disconnect","_getNewObserver","section","observe","observableSection","scrollTo","behavior","IntersectionObserver","_observerCallback","targetElement","_process","userScrollsDown","isIntersecting","_clearActiveClass","entryIsLowerThanPrevious","targetLinks","anchor","decodeURI","_activateParents","listGroup","activeNodes","spy","ARROW_LEFT_KEY","ARROW_RIGHT_KEY","HOME_KEY","END_KEY","NOT_SELECTOR_DROPDOWN_TOGGLE","SELECTOR_INNER_ELEM","SELECTOR_DATA_TOGGLE_ACTIVE","Tab","_setInitialAttributes","_getChildren","innerElem","_elemIsActive","active","_getActiveElem","hideEvent","_deactivate","_activate","relatedElem","_toggleDropDown","nextActiveElement","preventScroll","_setAttributeIfNotExists","_setInitialAttributesOnChild","_getInnerElement","isActive","outerElem","_getOuterElement","_setInitialAttributesOnTargetPanel","open","EVENT_MOUSEOVER","EVENT_MOUSEOUT","EVENT_FOCUSOUT","CLASS_NAME_HIDE","autohide","Toast","_hasMouseInteraction","_hasKeyboardInteraction","_clearTimeout","_maybeScheduleHide","isShown","_onInteraction","isInteracting"],"sources":["../../js/src/dom/data.js","../../js/src/util/index.js","../../js/src/dom/event-handler.js","../../js/src/dom/manipulator.js","../../js/src/util/config.js","../../js/src/base-component.js","../../js/src/dom/selector-engine.js","../../js/src/util/component-functions.js","../../js/src/alert.js","../../js/src/button.js","../../js/src/util/swipe.js","../../js/src/carousel.js","../../js/src/collapse.js","../../node_modules/@popperjs/core/lib/enums.js","../../node_modules/@popperjs/core/lib/dom-utils/getNodeName.js","../../node_modules/@popperjs/core/lib/dom-utils/getWindow.js","../../node_modules/@popperjs/core/lib/dom-utils/instanceOf.js","../../node_modules/@popperjs/core/lib/modifiers/applyStyles.js","../../node_modules/@popperjs/core/lib/utils/getBasePlacement.js","../../node_modules/@popperjs/core/lib/utils/math.js","../../node_modules/@popperjs/core/lib/utils/userAgent.js","../../node_modules/@popperjs/core/lib/dom-utils/isLayoutViewport.js","../../node_modules/@popperjs/core/lib/dom-utils/getBoundingClientRect.js","../../node_modules/@popperjs/core/lib/dom-utils/getLayoutRect.js","../../node_modules/@popperjs/core/lib/dom-utils/contains.js","../../node_modules/@popperjs/core/lib/dom-utils/getComputedStyle.js","../../node_modules/@popperjs/core/lib/dom-utils/isTableElement.js","../../node_modules/@popperjs/core/lib/dom-utils/getDocumentElement.js","../../node_modules/@popperjs/core/lib/dom-utils/getParentNode.js","../../node_modules/@popperjs/core/lib/dom-utils/getOffsetParent.js","../../node_modules/@popperjs/core/lib/utils/getMainAxisFromPlacement.js","../../node_modules/@popperjs/core/lib/utils/within.js","../../node_modules/@popperjs/core/lib/utils/mergePaddingObject.js","../../node_modules/@popperjs/core/lib/utils/getFreshSideObject.js","../../node_modules/@popperjs/core/lib/utils/expandToHashMap.js","../../node_modules/@popperjs/core/lib/modifiers/arrow.js","../../node_modules/@popperjs/core/lib/utils/getVariation.js","../../node_modules/@popperjs/core/lib/modifiers/computeStyles.js","../../node_modules/@popperjs/core/lib/modifiers/eventListeners.js","../../node_modules/@popperjs/core/lib/utils/getOppositePlacement.js","../../node_modules/@popperjs/core/lib/utils/getOppositeVariationPlacement.js","../../node_modules/@popperjs/core/lib/dom-utils/getWindowScroll.js","../../node_modules/@popperjs/core/lib/dom-utils/getWindowScrollBarX.js","../../node_modules/@popperjs/core/lib/dom-utils/isScrollParent.js","../../node_modules/@popperjs/core/lib/dom-utils/getScrollParent.js","../../node_modules/@popperjs/core/lib/dom-utils/listScrollParents.js","../../node_modules/@popperjs/core/lib/utils/rectToClientRect.js","../../node_modules/@popperjs/core/lib/dom-utils/getClippingRect.js","../../node_modules/@popperjs/core/lib/dom-utils/getViewportRect.js","../../node_modules/@popperjs/core/lib/dom-utils/getDocumentRect.js","../../node_modules/@popperjs/core/lib/utils/computeOffsets.js","../../node_modules/@popperjs/core/lib/utils/detectOverflow.js","../../node_modules/@popperjs/core/lib/utils/computeAutoPlacement.js","../../node_modules/@popperjs/core/lib/modifiers/flip.js","../../node_modules/@popperjs/core/lib/modifiers/hide.js","../../node_modules/@popperjs/core/lib/modifiers/offset.js","../../node_modules/@popperjs/core/lib/modifiers/popperOffsets.js","../../node_modules/@popperjs/core/lib/modifiers/preventOverflow.js","../../node_modules/@popperjs/core/lib/utils/getAltAxis.js","../../node_modules/@popperjs/core/lib/dom-utils/getCompositeRect.js","../../node_modules/@popperjs/core/lib/dom-utils/getNodeScroll.js","../../node_modules/@popperjs/core/lib/dom-utils/getHTMLElementScroll.js","../../node_modules/@popperjs/core/lib/utils/orderModifiers.js","../../node_modules/@popperjs/core/lib/createPopper.js","../../node_modules/@popperjs/core/lib/utils/debounce.js","../../node_modules/@popperjs/core/lib/utils/mergeByName.js","../../node_modules/@popperjs/core/lib/popper-lite.js","../../node_modules/@popperjs/core/lib/popper.js","../../js/src/dropdown.js","../../js/src/util/backdrop.js","../../js/src/util/focustrap.js","../../js/src/util/scrollbar.js","../../js/src/modal.js","../../js/src/offcanvas.js","../../js/src/util/sanitizer.js","../../js/src/util/template-factory.js","../../js/src/tooltip.js","../../js/src/popover.js","../../js/src/scrollspy.js","../../js/src/tab.js","../../js/src/toast.js","../../js/index.umd.js"],"sourcesContent":["/**\n * --------------------------------------------------------------------------\n * Bootstrap dom/data.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\n/**\n * Constants\n */\n\nconst elementMap = new Map()\n\nexport default {\n set(element, key, instance) {\n if (!elementMap.has(element)) {\n elementMap.set(element, new Map())\n }\n\n const instanceMap = elementMap.get(element)\n\n // make it clear we only want one instance per element\n // can be removed later when multiple key/instances are fine to be used\n if (!instanceMap.has(key) && instanceMap.size !== 0) {\n // eslint-disable-next-line no-console\n console.error(`Bootstrap doesn't allow more than one instance per element. Bound instance: ${Array.from(instanceMap.keys())[0]}.`)\n return\n }\n\n instanceMap.set(key, instance)\n },\n\n get(element, key) {\n if (elementMap.has(element)) {\n return elementMap.get(element).get(key) || null\n }\n\n return null\n },\n\n remove(element, key) {\n if (!elementMap.has(element)) {\n return\n }\n\n const instanceMap = elementMap.get(element)\n\n instanceMap.delete(key)\n\n // free up element references if there are no instances left for an element\n if (instanceMap.size === 0) {\n elementMap.delete(element)\n }\n }\n}\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/index.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nconst MAX_UID = 1_000_000\nconst MILLISECONDS_MULTIPLIER = 1000\nconst TRANSITION_END = 'transitionend'\n\n/**\n * Properly escape IDs selectors to handle weird IDs\n * @param {string} selector\n * @returns {string}\n */\nconst parseSelector = selector => {\n if (selector && window.CSS && window.CSS.escape) {\n // document.querySelector needs escaping to handle IDs (html5+) containing for instance /\n selector = selector.replace(/#([^\\s\"#']+)/g, (match, id) => `#${CSS.escape(id)}`)\n }\n\n return selector\n}\n\n// Shout-out Angus Croll (https://goo.gl/pxwQGp)\nconst toType = object => {\n if (object === null || object === undefined) {\n return `${object}`\n }\n\n return Object.prototype.toString.call(object).match(/\\s([a-z]+)/i)[1].toLowerCase()\n}\n\n/**\n * Public Util API\n */\n\nconst getUID = prefix => {\n do {\n prefix += Math.floor(Math.random() * MAX_UID)\n } while (document.getElementById(prefix))\n\n return prefix\n}\n\nconst getTransitionDurationFromElement = element => {\n if (!element) {\n return 0\n }\n\n // Get transition-duration of the element\n let { transitionDuration, transitionDelay } = window.getComputedStyle(element)\n\n const floatTransitionDuration = Number.parseFloat(transitionDuration)\n const floatTransitionDelay = Number.parseFloat(transitionDelay)\n\n // Return 0 if element or transition duration is not found\n if (!floatTransitionDuration && !floatTransitionDelay) {\n return 0\n }\n\n // If multiple durations are defined, take the first\n transitionDuration = transitionDuration.split(',')[0]\n transitionDelay = transitionDelay.split(',')[0]\n\n return (Number.parseFloat(transitionDuration) + Number.parseFloat(transitionDelay)) * MILLISECONDS_MULTIPLIER\n}\n\nconst triggerTransitionEnd = element => {\n element.dispatchEvent(new Event(TRANSITION_END))\n}\n\nconst isElement = object => {\n if (!object || typeof object !== 'object') {\n return false\n }\n\n if (typeof object.jquery !== 'undefined') {\n object = object[0]\n }\n\n return typeof object.nodeType !== 'undefined'\n}\n\nconst getElement = object => {\n // it's a jQuery object or a node element\n if (isElement(object)) {\n return object.jquery ? object[0] : object\n }\n\n if (typeof object === 'string' && object.length > 0) {\n return document.querySelector(parseSelector(object))\n }\n\n return null\n}\n\nconst isVisible = element => {\n if (!isElement(element) || element.getClientRects().length === 0) {\n return false\n }\n\n const elementIsVisible = getComputedStyle(element).getPropertyValue('visibility') === 'visible'\n // Handle `details` element as its content may falsie appear visible when it is closed\n const closedDetails = element.closest('details:not([open])')\n\n if (!closedDetails) {\n return elementIsVisible\n }\n\n if (closedDetails !== element) {\n const summary = element.closest('summary')\n if (summary && summary.parentNode !== closedDetails) {\n return false\n }\n\n if (summary === null) {\n return false\n }\n }\n\n return elementIsVisible\n}\n\nconst isDisabled = element => {\n if (!element || element.nodeType !== Node.ELEMENT_NODE) {\n return true\n }\n\n if (element.classList.contains('disabled')) {\n return true\n }\n\n if (typeof element.disabled !== 'undefined') {\n return element.disabled\n }\n\n return element.hasAttribute('disabled') && element.getAttribute('disabled') !== 'false'\n}\n\nconst findShadowRoot = element => {\n if (!document.documentElement.attachShadow) {\n return null\n }\n\n // Can find the shadow root otherwise it'll return the document\n if (typeof element.getRootNode === 'function') {\n const root = element.getRootNode()\n return root instanceof ShadowRoot ? root : null\n }\n\n if (element instanceof ShadowRoot) {\n return element\n }\n\n // when we don't find a shadow root\n if (!element.parentNode) {\n return null\n }\n\n return findShadowRoot(element.parentNode)\n}\n\nconst noop = () => {}\n\n/**\n * Trick to restart an element's animation\n *\n * @param {HTMLElement} element\n * @return void\n *\n * @see https://www.charistheo.io/blog/2021/02/restart-a-css-animation-with-javascript/#restarting-a-css-animation\n */\nconst reflow = element => {\n element.offsetHeight // eslint-disable-line no-unused-expressions\n}\n\nconst getjQuery = () => {\n if (window.jQuery && !document.body.hasAttribute('data-bs-no-jquery')) {\n return window.jQuery\n }\n\n return null\n}\n\nconst DOMContentLoadedCallbacks = []\n\nconst onDOMContentLoaded = callback => {\n if (document.readyState === 'loading') {\n // add listener on the first call when the document is in loading state\n if (!DOMContentLoadedCallbacks.length) {\n document.addEventListener('DOMContentLoaded', () => {\n for (const callback of DOMContentLoadedCallbacks) {\n callback()\n }\n })\n }\n\n DOMContentLoadedCallbacks.push(callback)\n } else {\n callback()\n }\n}\n\nconst isRTL = () => document.documentElement.dir === 'rtl'\n\nconst defineJQueryPlugin = plugin => {\n onDOMContentLoaded(() => {\n const $ = getjQuery()\n /* istanbul ignore if */\n if ($) {\n const name = plugin.NAME\n const JQUERY_NO_CONFLICT = $.fn[name]\n $.fn[name] = plugin.jQueryInterface\n $.fn[name].Constructor = plugin\n $.fn[name].noConflict = () => {\n $.fn[name] = JQUERY_NO_CONFLICT\n return plugin.jQueryInterface\n }\n }\n })\n}\n\nconst execute = (possibleCallback, args = [], defaultValue = possibleCallback) => {\n return typeof possibleCallback === 'function' ? possibleCallback(...args) : defaultValue\n}\n\nconst executeAfterTransition = (callback, transitionElement, waitForTransition = true) => {\n if (!waitForTransition) {\n execute(callback)\n return\n }\n\n const durationPadding = 5\n const emulatedDuration = getTransitionDurationFromElement(transitionElement) + durationPadding\n\n let called = false\n\n const handler = ({ target }) => {\n if (target !== transitionElement) {\n return\n }\n\n called = true\n transitionElement.removeEventListener(TRANSITION_END, handler)\n execute(callback)\n }\n\n transitionElement.addEventListener(TRANSITION_END, handler)\n setTimeout(() => {\n if (!called) {\n triggerTransitionEnd(transitionElement)\n }\n }, emulatedDuration)\n}\n\n/**\n * Return the previous/next element of a list.\n *\n * @param {array} list The list of elements\n * @param activeElement The active element\n * @param shouldGetNext Choose to get next or previous element\n * @param isCycleAllowed\n * @return {Element|elem} The proper element\n */\nconst getNextActiveElement = (list, activeElement, shouldGetNext, isCycleAllowed) => {\n const listLength = list.length\n let index = list.indexOf(activeElement)\n\n // if the element does not exist in the list return an element\n // depending on the direction and if cycle is allowed\n if (index === -1) {\n return !shouldGetNext && isCycleAllowed ? list[listLength - 1] : list[0]\n }\n\n index += shouldGetNext ? 1 : -1\n\n if (isCycleAllowed) {\n index = (index + listLength) % listLength\n }\n\n return list[Math.max(0, Math.min(index, listLength - 1))]\n}\n\nexport {\n defineJQueryPlugin,\n execute,\n executeAfterTransition,\n findShadowRoot,\n getElement,\n getjQuery,\n getNextActiveElement,\n getTransitionDurationFromElement,\n getUID,\n isDisabled,\n isElement,\n isRTL,\n isVisible,\n noop,\n onDOMContentLoaded,\n parseSelector,\n reflow,\n triggerTransitionEnd,\n toType\n}\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap dom/event-handler.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport { getjQuery } from '../util/index.js'\n\n/**\n * Constants\n */\n\nconst namespaceRegex = /[^.]*(?=\\..*)\\.|.*/\nconst stripNameRegex = /\\..*/\nconst stripUidRegex = /::\\d+$/\nconst eventRegistry = {} // Events storage\nlet uidEvent = 1\nconst customEvents = {\n mouseenter: 'mouseover',\n mouseleave: 'mouseout'\n}\n\nconst nativeEvents = new Set([\n 'click',\n 'dblclick',\n 'mouseup',\n 'mousedown',\n 'contextmenu',\n 'mousewheel',\n 'DOMMouseScroll',\n 'mouseover',\n 'mouseout',\n 'mousemove',\n 'selectstart',\n 'selectend',\n 'keydown',\n 'keypress',\n 'keyup',\n 'orientationchange',\n 'touchstart',\n 'touchmove',\n 'touchend',\n 'touchcancel',\n 'pointerdown',\n 'pointermove',\n 'pointerup',\n 'pointerleave',\n 'pointercancel',\n 'gesturestart',\n 'gesturechange',\n 'gestureend',\n 'focus',\n 'blur',\n 'change',\n 'reset',\n 'select',\n 'submit',\n 'focusin',\n 'focusout',\n 'load',\n 'unload',\n 'beforeunload',\n 'resize',\n 'move',\n 'DOMContentLoaded',\n 'readystatechange',\n 'error',\n 'abort',\n 'scroll'\n])\n\n/**\n * Private methods\n */\n\nfunction makeEventUid(element, uid) {\n return (uid && `${uid}::${uidEvent++}`) || element.uidEvent || uidEvent++\n}\n\nfunction getElementEvents(element) {\n const uid = makeEventUid(element)\n\n element.uidEvent = uid\n eventRegistry[uid] = eventRegistry[uid] || {}\n\n return eventRegistry[uid]\n}\n\nfunction bootstrapHandler(element, fn) {\n return function handler(event) {\n hydrateObj(event, { delegateTarget: element })\n\n if (handler.oneOff) {\n EventHandler.off(element, event.type, fn)\n }\n\n return fn.apply(element, [event])\n }\n}\n\nfunction bootstrapDelegationHandler(element, selector, fn) {\n return function handler(event) {\n const domElements = element.querySelectorAll(selector)\n\n for (let { target } = event; target && target !== this; target = target.parentNode) {\n for (const domElement of domElements) {\n if (domElement !== target) {\n continue\n }\n\n hydrateObj(event, { delegateTarget: target })\n\n if (handler.oneOff) {\n EventHandler.off(element, event.type, selector, fn)\n }\n\n return fn.apply(target, [event])\n }\n }\n }\n}\n\nfunction findHandler(events, callable, delegationSelector = null) {\n return Object.values(events)\n .find(event => event.callable === callable && event.delegationSelector === delegationSelector)\n}\n\nfunction normalizeParameters(originalTypeEvent, handler, delegationFunction) {\n const isDelegated = typeof handler === 'string'\n // TODO: tooltip passes `false` instead of selector, so we need to check\n const callable = isDelegated ? delegationFunction : (handler || delegationFunction)\n let typeEvent = getTypeEvent(originalTypeEvent)\n\n if (!nativeEvents.has(typeEvent)) {\n typeEvent = originalTypeEvent\n }\n\n return [isDelegated, callable, typeEvent]\n}\n\nfunction addHandler(element, originalTypeEvent, handler, delegationFunction, oneOff) {\n if (typeof originalTypeEvent !== 'string' || !element) {\n return\n }\n\n let [isDelegated, callable, typeEvent] = normalizeParameters(originalTypeEvent, handler, delegationFunction)\n\n // in case of mouseenter or mouseleave wrap the handler within a function that checks for its DOM position\n // this prevents the handler from being dispatched the same way as mouseover or mouseout does\n if (originalTypeEvent in customEvents) {\n const wrapFunction = fn => {\n return function (event) {\n if (!event.relatedTarget || (event.relatedTarget !== event.delegateTarget && !event.delegateTarget.contains(event.relatedTarget))) {\n return fn.call(this, event)\n }\n }\n }\n\n callable = wrapFunction(callable)\n }\n\n const events = getElementEvents(element)\n const handlers = events[typeEvent] || (events[typeEvent] = {})\n const previousFunction = findHandler(handlers, callable, isDelegated ? handler : null)\n\n if (previousFunction) {\n previousFunction.oneOff = previousFunction.oneOff && oneOff\n\n return\n }\n\n const uid = makeEventUid(callable, originalTypeEvent.replace(namespaceRegex, ''))\n const fn = isDelegated ?\n bootstrapDelegationHandler(element, handler, callable) :\n bootstrapHandler(element, callable)\n\n fn.delegationSelector = isDelegated ? handler : null\n fn.callable = callable\n fn.oneOff = oneOff\n fn.uidEvent = uid\n handlers[uid] = fn\n\n element.addEventListener(typeEvent, fn, isDelegated)\n}\n\nfunction removeHandler(element, events, typeEvent, handler, delegationSelector) {\n const fn = findHandler(events[typeEvent], handler, delegationSelector)\n\n if (!fn) {\n return\n }\n\n element.removeEventListener(typeEvent, fn, Boolean(delegationSelector))\n delete events[typeEvent][fn.uidEvent]\n}\n\nfunction removeNamespacedHandlers(element, events, typeEvent, namespace) {\n const storeElementEvent = events[typeEvent] || {}\n\n for (const [handlerKey, event] of Object.entries(storeElementEvent)) {\n if (handlerKey.includes(namespace)) {\n removeHandler(element, events, typeEvent, event.callable, event.delegationSelector)\n }\n }\n}\n\nfunction getTypeEvent(event) {\n // allow to get the native events from namespaced events ('click.bs.button' --> 'click')\n event = event.replace(stripNameRegex, '')\n return customEvents[event] || event\n}\n\nconst EventHandler = {\n on(element, event, handler, delegationFunction) {\n addHandler(element, event, handler, delegationFunction, false)\n },\n\n one(element, event, handler, delegationFunction) {\n addHandler(element, event, handler, delegationFunction, true)\n },\n\n off(element, originalTypeEvent, handler, delegationFunction) {\n if (typeof originalTypeEvent !== 'string' || !element) {\n return\n }\n\n const [isDelegated, callable, typeEvent] = normalizeParameters(originalTypeEvent, handler, delegationFunction)\n const inNamespace = typeEvent !== originalTypeEvent\n const events = getElementEvents(element)\n const storeElementEvent = events[typeEvent] || {}\n const isNamespace = originalTypeEvent.startsWith('.')\n\n if (typeof callable !== 'undefined') {\n // Simplest case: handler is passed, remove that listener ONLY.\n if (!Object.keys(storeElementEvent).length) {\n return\n }\n\n removeHandler(element, events, typeEvent, callable, isDelegated ? handler : null)\n return\n }\n\n if (isNamespace) {\n for (const elementEvent of Object.keys(events)) {\n removeNamespacedHandlers(element, events, elementEvent, originalTypeEvent.slice(1))\n }\n }\n\n for (const [keyHandlers, event] of Object.entries(storeElementEvent)) {\n const handlerKey = keyHandlers.replace(stripUidRegex, '')\n\n if (!inNamespace || originalTypeEvent.includes(handlerKey)) {\n removeHandler(element, events, typeEvent, event.callable, event.delegationSelector)\n }\n }\n },\n\n trigger(element, event, args) {\n if (typeof event !== 'string' || !element) {\n return null\n }\n\n const $ = getjQuery()\n const typeEvent = getTypeEvent(event)\n const inNamespace = event !== typeEvent\n\n let jQueryEvent = null\n let bubbles = true\n let nativeDispatch = true\n let defaultPrevented = false\n\n if (inNamespace && $) {\n jQueryEvent = $.Event(event, args)\n\n $(element).trigger(jQueryEvent)\n bubbles = !jQueryEvent.isPropagationStopped()\n nativeDispatch = !jQueryEvent.isImmediatePropagationStopped()\n defaultPrevented = jQueryEvent.isDefaultPrevented()\n }\n\n const evt = hydrateObj(new Event(event, { bubbles, cancelable: true }), args)\n\n if (defaultPrevented) {\n evt.preventDefault()\n }\n\n if (nativeDispatch) {\n element.dispatchEvent(evt)\n }\n\n if (evt.defaultPrevented && jQueryEvent) {\n jQueryEvent.preventDefault()\n }\n\n return evt\n }\n}\n\nfunction hydrateObj(obj, meta = {}) {\n for (const [key, value] of Object.entries(meta)) {\n try {\n obj[key] = value\n } catch {\n Object.defineProperty(obj, key, {\n configurable: true,\n get() {\n return value\n }\n })\n }\n }\n\n return obj\n}\n\nexport default EventHandler\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap dom/manipulator.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nfunction normalizeData(value) {\n if (value === 'true') {\n return true\n }\n\n if (value === 'false') {\n return false\n }\n\n if (value === Number(value).toString()) {\n return Number(value)\n }\n\n if (value === '' || value === 'null') {\n return null\n }\n\n if (typeof value !== 'string') {\n return value\n }\n\n try {\n return JSON.parse(decodeURIComponent(value))\n } catch {\n return value\n }\n}\n\nfunction normalizeDataKey(key) {\n return key.replace(/[A-Z]/g, chr => `-${chr.toLowerCase()}`)\n}\n\nconst Manipulator = {\n setDataAttribute(element, key, value) {\n element.setAttribute(`data-bs-${normalizeDataKey(key)}`, value)\n },\n\n removeDataAttribute(element, key) {\n element.removeAttribute(`data-bs-${normalizeDataKey(key)}`)\n },\n\n getDataAttributes(element) {\n if (!element) {\n return {}\n }\n\n const attributes = {}\n const bsKeys = Object.keys(element.dataset).filter(key => key.startsWith('bs') && !key.startsWith('bsConfig'))\n\n for (const key of bsKeys) {\n let pureKey = key.replace(/^bs/, '')\n pureKey = pureKey.charAt(0).toLowerCase() + pureKey.slice(1, pureKey.length)\n attributes[pureKey] = normalizeData(element.dataset[key])\n }\n\n return attributes\n },\n\n getDataAttribute(element, key) {\n return normalizeData(element.getAttribute(`data-bs-${normalizeDataKey(key)}`))\n }\n}\n\nexport default Manipulator\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/config.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport Manipulator from '../dom/manipulator.js'\nimport { isElement, toType } from './index.js'\n\n/**\n * Class definition\n */\n\nclass Config {\n // Getters\n static get Default() {\n return {}\n }\n\n static get DefaultType() {\n return {}\n }\n\n static get NAME() {\n throw new Error('You have to implement the static method \"NAME\", for each component!')\n }\n\n _getConfig(config) {\n config = this._mergeConfigObj(config)\n config = this._configAfterMerge(config)\n this._typeCheckConfig(config)\n return config\n }\n\n _configAfterMerge(config) {\n return config\n }\n\n _mergeConfigObj(config, element) {\n const jsonConfig = isElement(element) ? Manipulator.getDataAttribute(element, 'config') : {} // try to parse\n\n return {\n ...this.constructor.Default,\n ...(typeof jsonConfig === 'object' ? jsonConfig : {}),\n ...(isElement(element) ? Manipulator.getDataAttributes(element) : {}),\n ...(typeof config === 'object' ? config : {})\n }\n }\n\n _typeCheckConfig(config, configTypes = this.constructor.DefaultType) {\n for (const [property, expectedTypes] of Object.entries(configTypes)) {\n const value = config[property]\n const valueType = isElement(value) ? 'element' : toType(value)\n\n if (!new RegExp(expectedTypes).test(valueType)) {\n throw new TypeError(\n `${this.constructor.NAME.toUpperCase()}: Option \"${property}\" provided type \"${valueType}\" but expected type \"${expectedTypes}\".`\n )\n }\n }\n }\n}\n\nexport default Config\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap base-component.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport Data from './dom/data.js'\nimport EventHandler from './dom/event-handler.js'\nimport Config from './util/config.js'\nimport { executeAfterTransition, getElement } from './util/index.js'\n\n/**\n * Constants\n */\n\nconst VERSION = '5.3.1'\n\n/**\n * Class definition\n */\n\nclass BaseComponent extends Config {\n constructor(element, config) {\n super()\n\n element = getElement(element)\n if (!element) {\n return\n }\n\n this._element = element\n this._config = this._getConfig(config)\n\n Data.set(this._element, this.constructor.DATA_KEY, this)\n }\n\n // Public\n dispose() {\n Data.remove(this._element, this.constructor.DATA_KEY)\n EventHandler.off(this._element, this.constructor.EVENT_KEY)\n\n for (const propertyName of Object.getOwnPropertyNames(this)) {\n this[propertyName] = null\n }\n }\n\n _queueCallback(callback, element, isAnimated = true) {\n executeAfterTransition(callback, element, isAnimated)\n }\n\n _getConfig(config) {\n config = this._mergeConfigObj(config, this._element)\n config = this._configAfterMerge(config)\n this._typeCheckConfig(config)\n return config\n }\n\n // Static\n static getInstance(element) {\n return Data.get(getElement(element), this.DATA_KEY)\n }\n\n static getOrCreateInstance(element, config = {}) {\n return this.getInstance(element) || new this(element, typeof config === 'object' ? config : null)\n }\n\n static get VERSION() {\n return VERSION\n }\n\n static get DATA_KEY() {\n return `bs.${this.NAME}`\n }\n\n static get EVENT_KEY() {\n return `.${this.DATA_KEY}`\n }\n\n static eventName(name) {\n return `${name}${this.EVENT_KEY}`\n }\n}\n\nexport default BaseComponent\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap dom/selector-engine.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport { isDisabled, isVisible, parseSelector } from '../util/index.js'\n\nconst getSelector = element => {\n let selector = element.getAttribute('data-bs-target')\n\n if (!selector || selector === '#') {\n let hrefAttribute = element.getAttribute('href')\n\n // The only valid content that could double as a selector are IDs or classes,\n // so everything starting with `#` or `.`. If a \"real\" URL is used as the selector,\n // `document.querySelector` will rightfully complain it is invalid.\n // See https://github.com/twbs/bootstrap/issues/32273\n if (!hrefAttribute || (!hrefAttribute.includes('#') && !hrefAttribute.startsWith('.'))) {\n return null\n }\n\n // Just in case some CMS puts out a full URL with the anchor appended\n if (hrefAttribute.includes('#') && !hrefAttribute.startsWith('#')) {\n hrefAttribute = `#${hrefAttribute.split('#')[1]}`\n }\n\n selector = hrefAttribute && hrefAttribute !== '#' ? hrefAttribute.trim() : null\n }\n\n return parseSelector(selector)\n}\n\nconst SelectorEngine = {\n find(selector, element = document.documentElement) {\n return [].concat(...Element.prototype.querySelectorAll.call(element, selector))\n },\n\n findOne(selector, element = document.documentElement) {\n return Element.prototype.querySelector.call(element, selector)\n },\n\n children(element, selector) {\n return [].concat(...element.children).filter(child => child.matches(selector))\n },\n\n parents(element, selector) {\n const parents = []\n let ancestor = element.parentNode.closest(selector)\n\n while (ancestor) {\n parents.push(ancestor)\n ancestor = ancestor.parentNode.closest(selector)\n }\n\n return parents\n },\n\n prev(element, selector) {\n let previous = element.previousElementSibling\n\n while (previous) {\n if (previous.matches(selector)) {\n return [previous]\n }\n\n previous = previous.previousElementSibling\n }\n\n return []\n },\n // TODO: this is now unused; remove later along with prev()\n next(element, selector) {\n let next = element.nextElementSibling\n\n while (next) {\n if (next.matches(selector)) {\n return [next]\n }\n\n next = next.nextElementSibling\n }\n\n return []\n },\n\n focusableChildren(element) {\n const focusables = [\n 'a',\n 'button',\n 'input',\n 'textarea',\n 'select',\n 'details',\n '[tabindex]',\n '[contenteditable=\"true\"]'\n ].map(selector => `${selector}:not([tabindex^=\"-\"])`).join(',')\n\n return this.find(focusables, element).filter(el => !isDisabled(el) && isVisible(el))\n },\n\n getSelectorFromElement(element) {\n const selector = getSelector(element)\n\n if (selector) {\n return SelectorEngine.findOne(selector) ? selector : null\n }\n\n return null\n },\n\n getElementFromSelector(element) {\n const selector = getSelector(element)\n\n return selector ? SelectorEngine.findOne(selector) : null\n },\n\n getMultipleElementsFromSelector(element) {\n const selector = getSelector(element)\n\n return selector ? SelectorEngine.find(selector) : []\n }\n}\n\nexport default SelectorEngine\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/component-functions.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport EventHandler from '../dom/event-handler.js'\nimport SelectorEngine from '../dom/selector-engine.js'\nimport { isDisabled } from './index.js'\n\nconst enableDismissTrigger = (component, method = 'hide') => {\n const clickEvent = `click.dismiss${component.EVENT_KEY}`\n const name = component.NAME\n\n EventHandler.on(document, clickEvent, `[data-bs-dismiss=\"${name}\"]`, function (event) {\n if (['A', 'AREA'].includes(this.tagName)) {\n event.preventDefault()\n }\n\n if (isDisabled(this)) {\n return\n }\n\n const target = SelectorEngine.getElementFromSelector(this) || this.closest(`.${name}`)\n const instance = component.getOrCreateInstance(target)\n\n // Method argument is left, for Alert and only, as it doesn't implement the 'hide' method\n instance[method]()\n })\n}\n\nexport {\n enableDismissTrigger\n}\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap alert.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport { enableDismissTrigger } from './util/component-functions.js'\nimport { defineJQueryPlugin } from './util/index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'alert'\nconst DATA_KEY = 'bs.alert'\nconst EVENT_KEY = `.${DATA_KEY}`\n\nconst EVENT_CLOSE = `close${EVENT_KEY}`\nconst EVENT_CLOSED = `closed${EVENT_KEY}`\nconst CLASS_NAME_FADE = 'fade'\nconst CLASS_NAME_SHOW = 'show'\n\n/**\n * Class definition\n */\n\nclass Alert extends BaseComponent {\n // Getters\n static get NAME() {\n return NAME\n }\n\n // Public\n close() {\n const closeEvent = EventHandler.trigger(this._element, EVENT_CLOSE)\n\n if (closeEvent.defaultPrevented) {\n return\n }\n\n this._element.classList.remove(CLASS_NAME_SHOW)\n\n const isAnimated = this._element.classList.contains(CLASS_NAME_FADE)\n this._queueCallback(() => this._destroyElement(), this._element, isAnimated)\n }\n\n // Private\n _destroyElement() {\n this._element.remove()\n EventHandler.trigger(this._element, EVENT_CLOSED)\n this.dispose()\n }\n\n // Static\n static jQueryInterface(config) {\n return this.each(function () {\n const data = Alert.getOrCreateInstance(this)\n\n if (typeof config !== 'string') {\n return\n }\n\n if (data[config] === undefined || config.startsWith('_') || config === 'constructor') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config](this)\n })\n }\n}\n\n/**\n * Data API implementation\n */\n\nenableDismissTrigger(Alert, 'close')\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Alert)\n\nexport default Alert\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap button.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport { defineJQueryPlugin } from './util/index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'button'\nconst DATA_KEY = 'bs.button'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst DATA_API_KEY = '.data-api'\n\nconst CLASS_NAME_ACTIVE = 'active'\nconst SELECTOR_DATA_TOGGLE = '[data-bs-toggle=\"button\"]'\nconst EVENT_CLICK_DATA_API = `click${EVENT_KEY}${DATA_API_KEY}`\n\n/**\n * Class definition\n */\n\nclass Button extends BaseComponent {\n // Getters\n static get NAME() {\n return NAME\n }\n\n // Public\n toggle() {\n // Toggle class and sync the `aria-pressed` attribute with the return value of the `.toggle()` method\n this._element.setAttribute('aria-pressed', this._element.classList.toggle(CLASS_NAME_ACTIVE))\n }\n\n // Static\n static jQueryInterface(config) {\n return this.each(function () {\n const data = Button.getOrCreateInstance(this)\n\n if (config === 'toggle') {\n data[config]()\n }\n })\n }\n}\n\n/**\n * Data API implementation\n */\n\nEventHandler.on(document, EVENT_CLICK_DATA_API, SELECTOR_DATA_TOGGLE, event => {\n event.preventDefault()\n\n const button = event.target.closest(SELECTOR_DATA_TOGGLE)\n const data = Button.getOrCreateInstance(button)\n\n data.toggle()\n})\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Button)\n\nexport default Button\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/swipe.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport EventHandler from '../dom/event-handler.js'\nimport Config from './config.js'\nimport { execute } from './index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'swipe'\nconst EVENT_KEY = '.bs.swipe'\nconst EVENT_TOUCHSTART = `touchstart${EVENT_KEY}`\nconst EVENT_TOUCHMOVE = `touchmove${EVENT_KEY}`\nconst EVENT_TOUCHEND = `touchend${EVENT_KEY}`\nconst EVENT_POINTERDOWN = `pointerdown${EVENT_KEY}`\nconst EVENT_POINTERUP = `pointerup${EVENT_KEY}`\nconst POINTER_TYPE_TOUCH = 'touch'\nconst POINTER_TYPE_PEN = 'pen'\nconst CLASS_NAME_POINTER_EVENT = 'pointer-event'\nconst SWIPE_THRESHOLD = 40\n\nconst Default = {\n endCallback: null,\n leftCallback: null,\n rightCallback: null\n}\n\nconst DefaultType = {\n endCallback: '(function|null)',\n leftCallback: '(function|null)',\n rightCallback: '(function|null)'\n}\n\n/**\n * Class definition\n */\n\nclass Swipe extends Config {\n constructor(element, config) {\n super()\n this._element = element\n\n if (!element || !Swipe.isSupported()) {\n return\n }\n\n this._config = this._getConfig(config)\n this._deltaX = 0\n this._supportPointerEvents = Boolean(window.PointerEvent)\n this._initEvents()\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n dispose() {\n EventHandler.off(this._element, EVENT_KEY)\n }\n\n // Private\n _start(event) {\n if (!this._supportPointerEvents) {\n this._deltaX = event.touches[0].clientX\n\n return\n }\n\n if (this._eventIsPointerPenTouch(event)) {\n this._deltaX = event.clientX\n }\n }\n\n _end(event) {\n if (this._eventIsPointerPenTouch(event)) {\n this._deltaX = event.clientX - this._deltaX\n }\n\n this._handleSwipe()\n execute(this._config.endCallback)\n }\n\n _move(event) {\n this._deltaX = event.touches && event.touches.length > 1 ?\n 0 :\n event.touches[0].clientX - this._deltaX\n }\n\n _handleSwipe() {\n const absDeltaX = Math.abs(this._deltaX)\n\n if (absDeltaX <= SWIPE_THRESHOLD) {\n return\n }\n\n const direction = absDeltaX / this._deltaX\n\n this._deltaX = 0\n\n if (!direction) {\n return\n }\n\n execute(direction > 0 ? this._config.rightCallback : this._config.leftCallback)\n }\n\n _initEvents() {\n if (this._supportPointerEvents) {\n EventHandler.on(this._element, EVENT_POINTERDOWN, event => this._start(event))\n EventHandler.on(this._element, EVENT_POINTERUP, event => this._end(event))\n\n this._element.classList.add(CLASS_NAME_POINTER_EVENT)\n } else {\n EventHandler.on(this._element, EVENT_TOUCHSTART, event => this._start(event))\n EventHandler.on(this._element, EVENT_TOUCHMOVE, event => this._move(event))\n EventHandler.on(this._element, EVENT_TOUCHEND, event => this._end(event))\n }\n }\n\n _eventIsPointerPenTouch(event) {\n return this._supportPointerEvents && (event.pointerType === POINTER_TYPE_PEN || event.pointerType === POINTER_TYPE_TOUCH)\n }\n\n // Static\n static isSupported() {\n return 'ontouchstart' in document.documentElement || navigator.maxTouchPoints > 0\n }\n}\n\nexport default Swipe\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap carousel.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport Manipulator from './dom/manipulator.js'\nimport SelectorEngine from './dom/selector-engine.js'\nimport {\n defineJQueryPlugin,\n getNextActiveElement,\n isRTL,\n isVisible,\n reflow,\n triggerTransitionEnd\n} from './util/index.js'\nimport Swipe from './util/swipe.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'carousel'\nconst DATA_KEY = 'bs.carousel'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst DATA_API_KEY = '.data-api'\n\nconst ARROW_LEFT_KEY = 'ArrowLeft'\nconst ARROW_RIGHT_KEY = 'ArrowRight'\nconst TOUCHEVENT_COMPAT_WAIT = 500 // Time for mouse compat events to fire after touch\n\nconst ORDER_NEXT = 'next'\nconst ORDER_PREV = 'prev'\nconst DIRECTION_LEFT = 'left'\nconst DIRECTION_RIGHT = 'right'\n\nconst EVENT_SLIDE = `slide${EVENT_KEY}`\nconst EVENT_SLID = `slid${EVENT_KEY}`\nconst EVENT_KEYDOWN = `keydown${EVENT_KEY}`\nconst EVENT_MOUSEENTER = `mouseenter${EVENT_KEY}`\nconst EVENT_MOUSELEAVE = `mouseleave${EVENT_KEY}`\nconst EVENT_DRAG_START = `dragstart${EVENT_KEY}`\nconst EVENT_LOAD_DATA_API = `load${EVENT_KEY}${DATA_API_KEY}`\nconst EVENT_CLICK_DATA_API = `click${EVENT_KEY}${DATA_API_KEY}`\n\nconst CLASS_NAME_CAROUSEL = 'carousel'\nconst CLASS_NAME_ACTIVE = 'active'\nconst CLASS_NAME_SLIDE = 'slide'\nconst CLASS_NAME_END = 'carousel-item-end'\nconst CLASS_NAME_START = 'carousel-item-start'\nconst CLASS_NAME_NEXT = 'carousel-item-next'\nconst CLASS_NAME_PREV = 'carousel-item-prev'\n\nconst SELECTOR_ACTIVE = '.active'\nconst SELECTOR_ITEM = '.carousel-item'\nconst SELECTOR_ACTIVE_ITEM = SELECTOR_ACTIVE + SELECTOR_ITEM\nconst SELECTOR_ITEM_IMG = '.carousel-item img'\nconst SELECTOR_INDICATORS = '.carousel-indicators'\nconst SELECTOR_DATA_SLIDE = '[data-bs-slide], [data-bs-slide-to]'\nconst SELECTOR_DATA_RIDE = '[data-bs-ride=\"carousel\"]'\n\nconst KEY_TO_DIRECTION = {\n [ARROW_LEFT_KEY]: DIRECTION_RIGHT,\n [ARROW_RIGHT_KEY]: DIRECTION_LEFT\n}\n\nconst Default = {\n interval: 5000,\n keyboard: true,\n pause: 'hover',\n ride: false,\n touch: true,\n wrap: true\n}\n\nconst DefaultType = {\n interval: '(number|boolean)', // TODO:v6 remove boolean support\n keyboard: 'boolean',\n pause: '(string|boolean)',\n ride: '(boolean|string)',\n touch: 'boolean',\n wrap: 'boolean'\n}\n\n/**\n * Class definition\n */\n\nclass Carousel extends BaseComponent {\n constructor(element, config) {\n super(element, config)\n\n this._interval = null\n this._activeElement = null\n this._isSliding = false\n this.touchTimeout = null\n this._swipeHelper = null\n\n this._indicatorsElement = SelectorEngine.findOne(SELECTOR_INDICATORS, this._element)\n this._addEventListeners()\n\n if (this._config.ride === CLASS_NAME_CAROUSEL) {\n this.cycle()\n }\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n next() {\n this._slide(ORDER_NEXT)\n }\n\n nextWhenVisible() {\n // FIXME TODO use `document.visibilityState`\n // Don't call next when the page isn't visible\n // or the carousel or its parent isn't visible\n if (!document.hidden && isVisible(this._element)) {\n this.next()\n }\n }\n\n prev() {\n this._slide(ORDER_PREV)\n }\n\n pause() {\n if (this._isSliding) {\n triggerTransitionEnd(this._element)\n }\n\n this._clearInterval()\n }\n\n cycle() {\n this._clearInterval()\n this._updateInterval()\n\n this._interval = setInterval(() => this.nextWhenVisible(), this._config.interval)\n }\n\n _maybeEnableCycle() {\n if (!this._config.ride) {\n return\n }\n\n if (this._isSliding) {\n EventHandler.one(this._element, EVENT_SLID, () => this.cycle())\n return\n }\n\n this.cycle()\n }\n\n to(index) {\n const items = this._getItems()\n if (index > items.length - 1 || index < 0) {\n return\n }\n\n if (this._isSliding) {\n EventHandler.one(this._element, EVENT_SLID, () => this.to(index))\n return\n }\n\n const activeIndex = this._getItemIndex(this._getActive())\n if (activeIndex === index) {\n return\n }\n\n const order = index > activeIndex ? ORDER_NEXT : ORDER_PREV\n\n this._slide(order, items[index])\n }\n\n dispose() {\n if (this._swipeHelper) {\n this._swipeHelper.dispose()\n }\n\n super.dispose()\n }\n\n // Private\n _configAfterMerge(config) {\n config.defaultInterval = config.interval\n return config\n }\n\n _addEventListeners() {\n if (this._config.keyboard) {\n EventHandler.on(this._element, EVENT_KEYDOWN, event => this._keydown(event))\n }\n\n if (this._config.pause === 'hover') {\n EventHandler.on(this._element, EVENT_MOUSEENTER, () => this.pause())\n EventHandler.on(this._element, EVENT_MOUSELEAVE, () => this._maybeEnableCycle())\n }\n\n if (this._config.touch && Swipe.isSupported()) {\n this._addTouchEventListeners()\n }\n }\n\n _addTouchEventListeners() {\n for (const img of SelectorEngine.find(SELECTOR_ITEM_IMG, this._element)) {\n EventHandler.on(img, EVENT_DRAG_START, event => event.preventDefault())\n }\n\n const endCallBack = () => {\n if (this._config.pause !== 'hover') {\n return\n }\n\n // If it's a touch-enabled device, mouseenter/leave are fired as\n // part of the mouse compatibility events on first tap - the carousel\n // would stop cycling until user tapped out of it;\n // here, we listen for touchend, explicitly pause the carousel\n // (as if it's the second time we tap on it, mouseenter compat event\n // is NOT fired) and after a timeout (to allow for mouse compatibility\n // events to fire) we explicitly restart cycling\n\n this.pause()\n if (this.touchTimeout) {\n clearTimeout(this.touchTimeout)\n }\n\n this.touchTimeout = setTimeout(() => this._maybeEnableCycle(), TOUCHEVENT_COMPAT_WAIT + this._config.interval)\n }\n\n const swipeConfig = {\n leftCallback: () => this._slide(this._directionToOrder(DIRECTION_LEFT)),\n rightCallback: () => this._slide(this._directionToOrder(DIRECTION_RIGHT)),\n endCallback: endCallBack\n }\n\n this._swipeHelper = new Swipe(this._element, swipeConfig)\n }\n\n _keydown(event) {\n if (/input|textarea/i.test(event.target.tagName)) {\n return\n }\n\n const direction = KEY_TO_DIRECTION[event.key]\n if (direction) {\n event.preventDefault()\n this._slide(this._directionToOrder(direction))\n }\n }\n\n _getItemIndex(element) {\n return this._getItems().indexOf(element)\n }\n\n _setActiveIndicatorElement(index) {\n if (!this._indicatorsElement) {\n return\n }\n\n const activeIndicator = SelectorEngine.findOne(SELECTOR_ACTIVE, this._indicatorsElement)\n\n activeIndicator.classList.remove(CLASS_NAME_ACTIVE)\n activeIndicator.removeAttribute('aria-current')\n\n const newActiveIndicator = SelectorEngine.findOne(`[data-bs-slide-to=\"${index}\"]`, this._indicatorsElement)\n\n if (newActiveIndicator) {\n newActiveIndicator.classList.add(CLASS_NAME_ACTIVE)\n newActiveIndicator.setAttribute('aria-current', 'true')\n }\n }\n\n _updateInterval() {\n const element = this._activeElement || this._getActive()\n\n if (!element) {\n return\n }\n\n const elementInterval = Number.parseInt(element.getAttribute('data-bs-interval'), 10)\n\n this._config.interval = elementInterval || this._config.defaultInterval\n }\n\n _slide(order, element = null) {\n if (this._isSliding) {\n return\n }\n\n const activeElement = this._getActive()\n const isNext = order === ORDER_NEXT\n const nextElement = element || getNextActiveElement(this._getItems(), activeElement, isNext, this._config.wrap)\n\n if (nextElement === activeElement) {\n return\n }\n\n const nextElementIndex = this._getItemIndex(nextElement)\n\n const triggerEvent = eventName => {\n return EventHandler.trigger(this._element, eventName, {\n relatedTarget: nextElement,\n direction: this._orderToDirection(order),\n from: this._getItemIndex(activeElement),\n to: nextElementIndex\n })\n }\n\n const slideEvent = triggerEvent(EVENT_SLIDE)\n\n if (slideEvent.defaultPrevented) {\n return\n }\n\n if (!activeElement || !nextElement) {\n // Some weirdness is happening, so we bail\n // TODO: change tests that use empty divs to avoid this check\n return\n }\n\n const isCycling = Boolean(this._interval)\n this.pause()\n\n this._isSliding = true\n\n this._setActiveIndicatorElement(nextElementIndex)\n this._activeElement = nextElement\n\n const directionalClassName = isNext ? CLASS_NAME_START : CLASS_NAME_END\n const orderClassName = isNext ? CLASS_NAME_NEXT : CLASS_NAME_PREV\n\n nextElement.classList.add(orderClassName)\n\n reflow(nextElement)\n\n activeElement.classList.add(directionalClassName)\n nextElement.classList.add(directionalClassName)\n\n const completeCallBack = () => {\n nextElement.classList.remove(directionalClassName, orderClassName)\n nextElement.classList.add(CLASS_NAME_ACTIVE)\n\n activeElement.classList.remove(CLASS_NAME_ACTIVE, orderClassName, directionalClassName)\n\n this._isSliding = false\n\n triggerEvent(EVENT_SLID)\n }\n\n this._queueCallback(completeCallBack, activeElement, this._isAnimated())\n\n if (isCycling) {\n this.cycle()\n }\n }\n\n _isAnimated() {\n return this._element.classList.contains(CLASS_NAME_SLIDE)\n }\n\n _getActive() {\n return SelectorEngine.findOne(SELECTOR_ACTIVE_ITEM, this._element)\n }\n\n _getItems() {\n return SelectorEngine.find(SELECTOR_ITEM, this._element)\n }\n\n _clearInterval() {\n if (this._interval) {\n clearInterval(this._interval)\n this._interval = null\n }\n }\n\n _directionToOrder(direction) {\n if (isRTL()) {\n return direction === DIRECTION_LEFT ? ORDER_PREV : ORDER_NEXT\n }\n\n return direction === DIRECTION_LEFT ? ORDER_NEXT : ORDER_PREV\n }\n\n _orderToDirection(order) {\n if (isRTL()) {\n return order === ORDER_PREV ? DIRECTION_LEFT : DIRECTION_RIGHT\n }\n\n return order === ORDER_PREV ? DIRECTION_RIGHT : DIRECTION_LEFT\n }\n\n // Static\n static jQueryInterface(config) {\n return this.each(function () {\n const data = Carousel.getOrCreateInstance(this, config)\n\n if (typeof config === 'number') {\n data.to(config)\n return\n }\n\n if (typeof config === 'string') {\n if (data[config] === undefined || config.startsWith('_') || config === 'constructor') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config]()\n }\n })\n }\n}\n\n/**\n * Data API implementation\n */\n\nEventHandler.on(document, EVENT_CLICK_DATA_API, SELECTOR_DATA_SLIDE, function (event) {\n const target = SelectorEngine.getElementFromSelector(this)\n\n if (!target || !target.classList.contains(CLASS_NAME_CAROUSEL)) {\n return\n }\n\n event.preventDefault()\n\n const carousel = Carousel.getOrCreateInstance(target)\n const slideIndex = this.getAttribute('data-bs-slide-to')\n\n if (slideIndex) {\n carousel.to(slideIndex)\n carousel._maybeEnableCycle()\n return\n }\n\n if (Manipulator.getDataAttribute(this, 'slide') === 'next') {\n carousel.next()\n carousel._maybeEnableCycle()\n return\n }\n\n carousel.prev()\n carousel._maybeEnableCycle()\n})\n\nEventHandler.on(window, EVENT_LOAD_DATA_API, () => {\n const carousels = SelectorEngine.find(SELECTOR_DATA_RIDE)\n\n for (const carousel of carousels) {\n Carousel.getOrCreateInstance(carousel)\n }\n})\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Carousel)\n\nexport default Carousel\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap collapse.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport SelectorEngine from './dom/selector-engine.js'\nimport {\n defineJQueryPlugin,\n getElement,\n reflow\n} from './util/index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'collapse'\nconst DATA_KEY = 'bs.collapse'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst DATA_API_KEY = '.data-api'\n\nconst EVENT_SHOW = `show${EVENT_KEY}`\nconst EVENT_SHOWN = `shown${EVENT_KEY}`\nconst EVENT_HIDE = `hide${EVENT_KEY}`\nconst EVENT_HIDDEN = `hidden${EVENT_KEY}`\nconst EVENT_CLICK_DATA_API = `click${EVENT_KEY}${DATA_API_KEY}`\n\nconst CLASS_NAME_SHOW = 'show'\nconst CLASS_NAME_COLLAPSE = 'collapse'\nconst CLASS_NAME_COLLAPSING = 'collapsing'\nconst CLASS_NAME_COLLAPSED = 'collapsed'\nconst CLASS_NAME_DEEPER_CHILDREN = `:scope .${CLASS_NAME_COLLAPSE} .${CLASS_NAME_COLLAPSE}`\nconst CLASS_NAME_HORIZONTAL = 'collapse-horizontal'\n\nconst WIDTH = 'width'\nconst HEIGHT = 'height'\n\nconst SELECTOR_ACTIVES = '.collapse.show, .collapse.collapsing'\nconst SELECTOR_DATA_TOGGLE = '[data-bs-toggle=\"collapse\"]'\n\nconst Default = {\n parent: null,\n toggle: true\n}\n\nconst DefaultType = {\n parent: '(null|element)',\n toggle: 'boolean'\n}\n\n/**\n * Class definition\n */\n\nclass Collapse extends BaseComponent {\n constructor(element, config) {\n super(element, config)\n\n this._isTransitioning = false\n this._triggerArray = []\n\n const toggleList = SelectorEngine.find(SELECTOR_DATA_TOGGLE)\n\n for (const elem of toggleList) {\n const selector = SelectorEngine.getSelectorFromElement(elem)\n const filterElement = SelectorEngine.find(selector)\n .filter(foundElement => foundElement === this._element)\n\n if (selector !== null && filterElement.length) {\n this._triggerArray.push(elem)\n }\n }\n\n this._initializeChildren()\n\n if (!this._config.parent) {\n this._addAriaAndCollapsedClass(this._triggerArray, this._isShown())\n }\n\n if (this._config.toggle) {\n this.toggle()\n }\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n toggle() {\n if (this._isShown()) {\n this.hide()\n } else {\n this.show()\n }\n }\n\n show() {\n if (this._isTransitioning || this._isShown()) {\n return\n }\n\n let activeChildren = []\n\n // find active children\n if (this._config.parent) {\n activeChildren = this._getFirstLevelChildren(SELECTOR_ACTIVES)\n .filter(element => element !== this._element)\n .map(element => Collapse.getOrCreateInstance(element, { toggle: false }))\n }\n\n if (activeChildren.length && activeChildren[0]._isTransitioning) {\n return\n }\n\n const startEvent = EventHandler.trigger(this._element, EVENT_SHOW)\n if (startEvent.defaultPrevented) {\n return\n }\n\n for (const activeInstance of activeChildren) {\n activeInstance.hide()\n }\n\n const dimension = this._getDimension()\n\n this._element.classList.remove(CLASS_NAME_COLLAPSE)\n this._element.classList.add(CLASS_NAME_COLLAPSING)\n\n this._element.style[dimension] = 0\n\n this._addAriaAndCollapsedClass(this._triggerArray, true)\n this._isTransitioning = true\n\n const complete = () => {\n this._isTransitioning = false\n\n this._element.classList.remove(CLASS_NAME_COLLAPSING)\n this._element.classList.add(CLASS_NAME_COLLAPSE, CLASS_NAME_SHOW)\n\n this._element.style[dimension] = ''\n\n EventHandler.trigger(this._element, EVENT_SHOWN)\n }\n\n const capitalizedDimension = dimension[0].toUpperCase() + dimension.slice(1)\n const scrollSize = `scroll${capitalizedDimension}`\n\n this._queueCallback(complete, this._element, true)\n this._element.style[dimension] = `${this._element[scrollSize]}px`\n }\n\n hide() {\n if (this._isTransitioning || !this._isShown()) {\n return\n }\n\n const startEvent = EventHandler.trigger(this._element, EVENT_HIDE)\n if (startEvent.defaultPrevented) {\n return\n }\n\n const dimension = this._getDimension()\n\n this._element.style[dimension] = `${this._element.getBoundingClientRect()[dimension]}px`\n\n reflow(this._element)\n\n this._element.classList.add(CLASS_NAME_COLLAPSING)\n this._element.classList.remove(CLASS_NAME_COLLAPSE, CLASS_NAME_SHOW)\n\n for (const trigger of this._triggerArray) {\n const element = SelectorEngine.getElementFromSelector(trigger)\n\n if (element && !this._isShown(element)) {\n this._addAriaAndCollapsedClass([trigger], false)\n }\n }\n\n this._isTransitioning = true\n\n const complete = () => {\n this._isTransitioning = false\n this._element.classList.remove(CLASS_NAME_COLLAPSING)\n this._element.classList.add(CLASS_NAME_COLLAPSE)\n EventHandler.trigger(this._element, EVENT_HIDDEN)\n }\n\n this._element.style[dimension] = ''\n\n this._queueCallback(complete, this._element, true)\n }\n\n _isShown(element = this._element) {\n return element.classList.contains(CLASS_NAME_SHOW)\n }\n\n // Private\n _configAfterMerge(config) {\n config.toggle = Boolean(config.toggle) // Coerce string values\n config.parent = getElement(config.parent)\n return config\n }\n\n _getDimension() {\n return this._element.classList.contains(CLASS_NAME_HORIZONTAL) ? WIDTH : HEIGHT\n }\n\n _initializeChildren() {\n if (!this._config.parent) {\n return\n }\n\n const children = this._getFirstLevelChildren(SELECTOR_DATA_TOGGLE)\n\n for (const element of children) {\n const selected = SelectorEngine.getElementFromSelector(element)\n\n if (selected) {\n this._addAriaAndCollapsedClass([element], this._isShown(selected))\n }\n }\n }\n\n _getFirstLevelChildren(selector) {\n const children = SelectorEngine.find(CLASS_NAME_DEEPER_CHILDREN, this._config.parent)\n // remove children if greater depth\n return SelectorEngine.find(selector, this._config.parent).filter(element => !children.includes(element))\n }\n\n _addAriaAndCollapsedClass(triggerArray, isOpen) {\n if (!triggerArray.length) {\n return\n }\n\n for (const element of triggerArray) {\n element.classList.toggle(CLASS_NAME_COLLAPSED, !isOpen)\n element.setAttribute('aria-expanded', isOpen)\n }\n }\n\n // Static\n static jQueryInterface(config) {\n const _config = {}\n if (typeof config === 'string' && /show|hide/.test(config)) {\n _config.toggle = false\n }\n\n return this.each(function () {\n const data = Collapse.getOrCreateInstance(this, _config)\n\n if (typeof config === 'string') {\n if (typeof data[config] === 'undefined') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config]()\n }\n })\n }\n}\n\n/**\n * Data API implementation\n */\n\nEventHandler.on(document, EVENT_CLICK_DATA_API, SELECTOR_DATA_TOGGLE, function (event) {\n // preventDefault only for elements (which change the URL) not inside the collapsible element\n if (event.target.tagName === 'A' || (event.delegateTarget && event.delegateTarget.tagName === 'A')) {\n event.preventDefault()\n }\n\n for (const element of SelectorEngine.getMultipleElementsFromSelector(this)) {\n Collapse.getOrCreateInstance(element, { toggle: false }).toggle()\n }\n})\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Collapse)\n\nexport default Collapse\n","export var top = 'top';\nexport var bottom = 'bottom';\nexport var right = 'right';\nexport var left = 'left';\nexport var auto = 'auto';\nexport var basePlacements = [top, bottom, right, left];\nexport var start = 'start';\nexport var end = 'end';\nexport var clippingParents = 'clippingParents';\nexport var viewport = 'viewport';\nexport var popper = 'popper';\nexport var reference = 'reference';\nexport var variationPlacements = /*#__PURE__*/basePlacements.reduce(function (acc, placement) {\n return acc.concat([placement + \"-\" + start, placement + \"-\" + end]);\n}, []);\nexport var placements = /*#__PURE__*/[].concat(basePlacements, [auto]).reduce(function (acc, placement) {\n return acc.concat([placement, placement + \"-\" + start, placement + \"-\" + end]);\n}, []); // modifiers that need to read the DOM\n\nexport var beforeRead = 'beforeRead';\nexport var read = 'read';\nexport var afterRead = 'afterRead'; // pure-logic modifiers\n\nexport var beforeMain = 'beforeMain';\nexport var main = 'main';\nexport var afterMain = 'afterMain'; // modifier with the purpose to write to the DOM (or write into a framework state)\n\nexport var beforeWrite = 'beforeWrite';\nexport var write = 'write';\nexport var afterWrite = 'afterWrite';\nexport var modifierPhases = [beforeRead, read, afterRead, beforeMain, main, afterMain, beforeWrite, write, afterWrite];","export default function getNodeName(element) {\n return element ? (element.nodeName || '').toLowerCase() : null;\n}","export default function getWindow(node) {\n if (node == null) {\n return window;\n }\n\n if (node.toString() !== '[object Window]') {\n var ownerDocument = node.ownerDocument;\n return ownerDocument ? ownerDocument.defaultView || window : window;\n }\n\n return node;\n}","import getWindow from \"./getWindow.js\";\n\nfunction isElement(node) {\n var OwnElement = getWindow(node).Element;\n return node instanceof OwnElement || node instanceof Element;\n}\n\nfunction isHTMLElement(node) {\n var OwnElement = getWindow(node).HTMLElement;\n return node instanceof OwnElement || node instanceof HTMLElement;\n}\n\nfunction isShadowRoot(node) {\n // IE 11 has no ShadowRoot\n if (typeof ShadowRoot === 'undefined') {\n return false;\n }\n\n var OwnElement = getWindow(node).ShadowRoot;\n return node instanceof OwnElement || node instanceof ShadowRoot;\n}\n\nexport { isElement, isHTMLElement, isShadowRoot };","import getNodeName from \"../dom-utils/getNodeName.js\";\nimport { isHTMLElement } from \"../dom-utils/instanceOf.js\"; // This modifier takes the styles prepared by the `computeStyles` modifier\n// and applies them to the HTMLElements such as popper and arrow\n\nfunction applyStyles(_ref) {\n var state = _ref.state;\n Object.keys(state.elements).forEach(function (name) {\n var style = state.styles[name] || {};\n var attributes = state.attributes[name] || {};\n var element = state.elements[name]; // arrow is optional + virtual elements\n\n if (!isHTMLElement(element) || !getNodeName(element)) {\n return;\n } // Flow doesn't support to extend this property, but it's the most\n // effective way to apply styles to an HTMLElement\n // $FlowFixMe[cannot-write]\n\n\n Object.assign(element.style, style);\n Object.keys(attributes).forEach(function (name) {\n var value = attributes[name];\n\n if (value === false) {\n element.removeAttribute(name);\n } else {\n element.setAttribute(name, value === true ? '' : value);\n }\n });\n });\n}\n\nfunction effect(_ref2) {\n var state = _ref2.state;\n var initialStyles = {\n popper: {\n position: state.options.strategy,\n left: '0',\n top: '0',\n margin: '0'\n },\n arrow: {\n position: 'absolute'\n },\n reference: {}\n };\n Object.assign(state.elements.popper.style, initialStyles.popper);\n state.styles = initialStyles;\n\n if (state.elements.arrow) {\n Object.assign(state.elements.arrow.style, initialStyles.arrow);\n }\n\n return function () {\n Object.keys(state.elements).forEach(function (name) {\n var element = state.elements[name];\n var attributes = state.attributes[name] || {};\n var styleProperties = Object.keys(state.styles.hasOwnProperty(name) ? state.styles[name] : initialStyles[name]); // Set all values to an empty string to unset them\n\n var style = styleProperties.reduce(function (style, property) {\n style[property] = '';\n return style;\n }, {}); // arrow is optional + virtual elements\n\n if (!isHTMLElement(element) || !getNodeName(element)) {\n return;\n }\n\n Object.assign(element.style, style);\n Object.keys(attributes).forEach(function (attribute) {\n element.removeAttribute(attribute);\n });\n });\n };\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'applyStyles',\n enabled: true,\n phase: 'write',\n fn: applyStyles,\n effect: effect,\n requires: ['computeStyles']\n};","import { auto } from \"../enums.js\";\nexport default function getBasePlacement(placement) {\n return placement.split('-')[0];\n}","export var max = Math.max;\nexport var min = Math.min;\nexport var round = Math.round;","export default function getUAString() {\n var uaData = navigator.userAgentData;\n\n if (uaData != null && uaData.brands && Array.isArray(uaData.brands)) {\n return uaData.brands.map(function (item) {\n return item.brand + \"/\" + item.version;\n }).join(' ');\n }\n\n return navigator.userAgent;\n}","import getUAString from \"../utils/userAgent.js\";\nexport default function isLayoutViewport() {\n return !/^((?!chrome|android).)*safari/i.test(getUAString());\n}","import { isElement, isHTMLElement } from \"./instanceOf.js\";\nimport { round } from \"../utils/math.js\";\nimport getWindow from \"./getWindow.js\";\nimport isLayoutViewport from \"./isLayoutViewport.js\";\nexport default function getBoundingClientRect(element, includeScale, isFixedStrategy) {\n if (includeScale === void 0) {\n includeScale = false;\n }\n\n if (isFixedStrategy === void 0) {\n isFixedStrategy = false;\n }\n\n var clientRect = element.getBoundingClientRect();\n var scaleX = 1;\n var scaleY = 1;\n\n if (includeScale && isHTMLElement(element)) {\n scaleX = element.offsetWidth > 0 ? round(clientRect.width) / element.offsetWidth || 1 : 1;\n scaleY = element.offsetHeight > 0 ? round(clientRect.height) / element.offsetHeight || 1 : 1;\n }\n\n var _ref = isElement(element) ? getWindow(element) : window,\n visualViewport = _ref.visualViewport;\n\n var addVisualOffsets = !isLayoutViewport() && isFixedStrategy;\n var x = (clientRect.left + (addVisualOffsets && visualViewport ? visualViewport.offsetLeft : 0)) / scaleX;\n var y = (clientRect.top + (addVisualOffsets && visualViewport ? visualViewport.offsetTop : 0)) / scaleY;\n var width = clientRect.width / scaleX;\n var height = clientRect.height / scaleY;\n return {\n width: width,\n height: height,\n top: y,\n right: x + width,\n bottom: y + height,\n left: x,\n x: x,\n y: y\n };\n}","import getBoundingClientRect from \"./getBoundingClientRect.js\"; // Returns the layout rect of an element relative to its offsetParent. Layout\n// means it doesn't take into account transforms.\n\nexport default function getLayoutRect(element) {\n var clientRect = getBoundingClientRect(element); // Use the clientRect sizes if it's not been transformed.\n // Fixes https://github.com/popperjs/popper-core/issues/1223\n\n var width = element.offsetWidth;\n var height = element.offsetHeight;\n\n if (Math.abs(clientRect.width - width) <= 1) {\n width = clientRect.width;\n }\n\n if (Math.abs(clientRect.height - height) <= 1) {\n height = clientRect.height;\n }\n\n return {\n x: element.offsetLeft,\n y: element.offsetTop,\n width: width,\n height: height\n };\n}","import { isShadowRoot } from \"./instanceOf.js\";\nexport default function contains(parent, child) {\n var rootNode = child.getRootNode && child.getRootNode(); // First, attempt with faster native method\n\n if (parent.contains(child)) {\n return true;\n } // then fallback to custom implementation with Shadow DOM support\n else if (rootNode && isShadowRoot(rootNode)) {\n var next = child;\n\n do {\n if (next && parent.isSameNode(next)) {\n return true;\n } // $FlowFixMe[prop-missing]: need a better way to handle this...\n\n\n next = next.parentNode || next.host;\n } while (next);\n } // Give up, the result is false\n\n\n return false;\n}","import getWindow from \"./getWindow.js\";\nexport default function getComputedStyle(element) {\n return getWindow(element).getComputedStyle(element);\n}","import getNodeName from \"./getNodeName.js\";\nexport default function isTableElement(element) {\n return ['table', 'td', 'th'].indexOf(getNodeName(element)) >= 0;\n}","import { isElement } from \"./instanceOf.js\";\nexport default function getDocumentElement(element) {\n // $FlowFixMe[incompatible-return]: assume body is always available\n return ((isElement(element) ? element.ownerDocument : // $FlowFixMe[prop-missing]\n element.document) || window.document).documentElement;\n}","import getNodeName from \"./getNodeName.js\";\nimport getDocumentElement from \"./getDocumentElement.js\";\nimport { isShadowRoot } from \"./instanceOf.js\";\nexport default function getParentNode(element) {\n if (getNodeName(element) === 'html') {\n return element;\n }\n\n return (// this is a quicker (but less type safe) way to save quite some bytes from the bundle\n // $FlowFixMe[incompatible-return]\n // $FlowFixMe[prop-missing]\n element.assignedSlot || // step into the shadow DOM of the parent of a slotted node\n element.parentNode || ( // DOM Element detected\n isShadowRoot(element) ? element.host : null) || // ShadowRoot detected\n // $FlowFixMe[incompatible-call]: HTMLElement is a Node\n getDocumentElement(element) // fallback\n\n );\n}","import getWindow from \"./getWindow.js\";\nimport getNodeName from \"./getNodeName.js\";\nimport getComputedStyle from \"./getComputedStyle.js\";\nimport { isHTMLElement, isShadowRoot } from \"./instanceOf.js\";\nimport isTableElement from \"./isTableElement.js\";\nimport getParentNode from \"./getParentNode.js\";\nimport getUAString from \"../utils/userAgent.js\";\n\nfunction getTrueOffsetParent(element) {\n if (!isHTMLElement(element) || // https://github.com/popperjs/popper-core/issues/837\n getComputedStyle(element).position === 'fixed') {\n return null;\n }\n\n return element.offsetParent;\n} // `.offsetParent` reports `null` for fixed elements, while absolute elements\n// return the containing block\n\n\nfunction getContainingBlock(element) {\n var isFirefox = /firefox/i.test(getUAString());\n var isIE = /Trident/i.test(getUAString());\n\n if (isIE && isHTMLElement(element)) {\n // In IE 9, 10 and 11 fixed elements containing block is always established by the viewport\n var elementCss = getComputedStyle(element);\n\n if (elementCss.position === 'fixed') {\n return null;\n }\n }\n\n var currentNode = getParentNode(element);\n\n if (isShadowRoot(currentNode)) {\n currentNode = currentNode.host;\n }\n\n while (isHTMLElement(currentNode) && ['html', 'body'].indexOf(getNodeName(currentNode)) < 0) {\n var css = getComputedStyle(currentNode); // This is non-exhaustive but covers the most common CSS properties that\n // create a containing block.\n // https://developer.mozilla.org/en-US/docs/Web/CSS/Containing_block#identifying_the_containing_block\n\n if (css.transform !== 'none' || css.perspective !== 'none' || css.contain === 'paint' || ['transform', 'perspective'].indexOf(css.willChange) !== -1 || isFirefox && css.willChange === 'filter' || isFirefox && css.filter && css.filter !== 'none') {\n return currentNode;\n } else {\n currentNode = currentNode.parentNode;\n }\n }\n\n return null;\n} // Gets the closest ancestor positioned element. Handles some edge cases,\n// such as table ancestors and cross browser bugs.\n\n\nexport default function getOffsetParent(element) {\n var window = getWindow(element);\n var offsetParent = getTrueOffsetParent(element);\n\n while (offsetParent && isTableElement(offsetParent) && getComputedStyle(offsetParent).position === 'static') {\n offsetParent = getTrueOffsetParent(offsetParent);\n }\n\n if (offsetParent && (getNodeName(offsetParent) === 'html' || getNodeName(offsetParent) === 'body' && getComputedStyle(offsetParent).position === 'static')) {\n return window;\n }\n\n return offsetParent || getContainingBlock(element) || window;\n}","export default function getMainAxisFromPlacement(placement) {\n return ['top', 'bottom'].indexOf(placement) >= 0 ? 'x' : 'y';\n}","import { max as mathMax, min as mathMin } from \"./math.js\";\nexport function within(min, value, max) {\n return mathMax(min, mathMin(value, max));\n}\nexport function withinMaxClamp(min, value, max) {\n var v = within(min, value, max);\n return v > max ? max : v;\n}","import getFreshSideObject from \"./getFreshSideObject.js\";\nexport default function mergePaddingObject(paddingObject) {\n return Object.assign({}, getFreshSideObject(), paddingObject);\n}","export default function getFreshSideObject() {\n return {\n top: 0,\n right: 0,\n bottom: 0,\n left: 0\n };\n}","export default function expandToHashMap(value, keys) {\n return keys.reduce(function (hashMap, key) {\n hashMap[key] = value;\n return hashMap;\n }, {});\n}","import getBasePlacement from \"../utils/getBasePlacement.js\";\nimport getLayoutRect from \"../dom-utils/getLayoutRect.js\";\nimport contains from \"../dom-utils/contains.js\";\nimport getOffsetParent from \"../dom-utils/getOffsetParent.js\";\nimport getMainAxisFromPlacement from \"../utils/getMainAxisFromPlacement.js\";\nimport { within } from \"../utils/within.js\";\nimport mergePaddingObject from \"../utils/mergePaddingObject.js\";\nimport expandToHashMap from \"../utils/expandToHashMap.js\";\nimport { left, right, basePlacements, top, bottom } from \"../enums.js\"; // eslint-disable-next-line import/no-unused-modules\n\nvar toPaddingObject = function toPaddingObject(padding, state) {\n padding = typeof padding === 'function' ? padding(Object.assign({}, state.rects, {\n placement: state.placement\n })) : padding;\n return mergePaddingObject(typeof padding !== 'number' ? padding : expandToHashMap(padding, basePlacements));\n};\n\nfunction arrow(_ref) {\n var _state$modifiersData$;\n\n var state = _ref.state,\n name = _ref.name,\n options = _ref.options;\n var arrowElement = state.elements.arrow;\n var popperOffsets = state.modifiersData.popperOffsets;\n var basePlacement = getBasePlacement(state.placement);\n var axis = getMainAxisFromPlacement(basePlacement);\n var isVertical = [left, right].indexOf(basePlacement) >= 0;\n var len = isVertical ? 'height' : 'width';\n\n if (!arrowElement || !popperOffsets) {\n return;\n }\n\n var paddingObject = toPaddingObject(options.padding, state);\n var arrowRect = getLayoutRect(arrowElement);\n var minProp = axis === 'y' ? top : left;\n var maxProp = axis === 'y' ? bottom : right;\n var endDiff = state.rects.reference[len] + state.rects.reference[axis] - popperOffsets[axis] - state.rects.popper[len];\n var startDiff = popperOffsets[axis] - state.rects.reference[axis];\n var arrowOffsetParent = getOffsetParent(arrowElement);\n var clientSize = arrowOffsetParent ? axis === 'y' ? arrowOffsetParent.clientHeight || 0 : arrowOffsetParent.clientWidth || 0 : 0;\n var centerToReference = endDiff / 2 - startDiff / 2; // Make sure the arrow doesn't overflow the popper if the center point is\n // outside of the popper bounds\n\n var min = paddingObject[minProp];\n var max = clientSize - arrowRect[len] - paddingObject[maxProp];\n var center = clientSize / 2 - arrowRect[len] / 2 + centerToReference;\n var offset = within(min, center, max); // Prevents breaking syntax highlighting...\n\n var axisProp = axis;\n state.modifiersData[name] = (_state$modifiersData$ = {}, _state$modifiersData$[axisProp] = offset, _state$modifiersData$.centerOffset = offset - center, _state$modifiersData$);\n}\n\nfunction effect(_ref2) {\n var state = _ref2.state,\n options = _ref2.options;\n var _options$element = options.element,\n arrowElement = _options$element === void 0 ? '[data-popper-arrow]' : _options$element;\n\n if (arrowElement == null) {\n return;\n } // CSS selector\n\n\n if (typeof arrowElement === 'string') {\n arrowElement = state.elements.popper.querySelector(arrowElement);\n\n if (!arrowElement) {\n return;\n }\n }\n\n if (!contains(state.elements.popper, arrowElement)) {\n return;\n }\n\n state.elements.arrow = arrowElement;\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'arrow',\n enabled: true,\n phase: 'main',\n fn: arrow,\n effect: effect,\n requires: ['popperOffsets'],\n requiresIfExists: ['preventOverflow']\n};","export default function getVariation(placement) {\n return placement.split('-')[1];\n}","import { top, left, right, bottom, end } from \"../enums.js\";\nimport getOffsetParent from \"../dom-utils/getOffsetParent.js\";\nimport getWindow from \"../dom-utils/getWindow.js\";\nimport getDocumentElement from \"../dom-utils/getDocumentElement.js\";\nimport getComputedStyle from \"../dom-utils/getComputedStyle.js\";\nimport getBasePlacement from \"../utils/getBasePlacement.js\";\nimport getVariation from \"../utils/getVariation.js\";\nimport { round } from \"../utils/math.js\"; // eslint-disable-next-line import/no-unused-modules\n\nvar unsetSides = {\n top: 'auto',\n right: 'auto',\n bottom: 'auto',\n left: 'auto'\n}; // Round the offsets to the nearest suitable subpixel based on the DPR.\n// Zooming can change the DPR, but it seems to report a value that will\n// cleanly divide the values into the appropriate subpixels.\n\nfunction roundOffsetsByDPR(_ref, win) {\n var x = _ref.x,\n y = _ref.y;\n var dpr = win.devicePixelRatio || 1;\n return {\n x: round(x * dpr) / dpr || 0,\n y: round(y * dpr) / dpr || 0\n };\n}\n\nexport function mapToStyles(_ref2) {\n var _Object$assign2;\n\n var popper = _ref2.popper,\n popperRect = _ref2.popperRect,\n placement = _ref2.placement,\n variation = _ref2.variation,\n offsets = _ref2.offsets,\n position = _ref2.position,\n gpuAcceleration = _ref2.gpuAcceleration,\n adaptive = _ref2.adaptive,\n roundOffsets = _ref2.roundOffsets,\n isFixed = _ref2.isFixed;\n var _offsets$x = offsets.x,\n x = _offsets$x === void 0 ? 0 : _offsets$x,\n _offsets$y = offsets.y,\n y = _offsets$y === void 0 ? 0 : _offsets$y;\n\n var _ref3 = typeof roundOffsets === 'function' ? roundOffsets({\n x: x,\n y: y\n }) : {\n x: x,\n y: y\n };\n\n x = _ref3.x;\n y = _ref3.y;\n var hasX = offsets.hasOwnProperty('x');\n var hasY = offsets.hasOwnProperty('y');\n var sideX = left;\n var sideY = top;\n var win = window;\n\n if (adaptive) {\n var offsetParent = getOffsetParent(popper);\n var heightProp = 'clientHeight';\n var widthProp = 'clientWidth';\n\n if (offsetParent === getWindow(popper)) {\n offsetParent = getDocumentElement(popper);\n\n if (getComputedStyle(offsetParent).position !== 'static' && position === 'absolute') {\n heightProp = 'scrollHeight';\n widthProp = 'scrollWidth';\n }\n } // $FlowFixMe[incompatible-cast]: force type refinement, we compare offsetParent with window above, but Flow doesn't detect it\n\n\n offsetParent = offsetParent;\n\n if (placement === top || (placement === left || placement === right) && variation === end) {\n sideY = bottom;\n var offsetY = isFixed && offsetParent === win && win.visualViewport ? win.visualViewport.height : // $FlowFixMe[prop-missing]\n offsetParent[heightProp];\n y -= offsetY - popperRect.height;\n y *= gpuAcceleration ? 1 : -1;\n }\n\n if (placement === left || (placement === top || placement === bottom) && variation === end) {\n sideX = right;\n var offsetX = isFixed && offsetParent === win && win.visualViewport ? win.visualViewport.width : // $FlowFixMe[prop-missing]\n offsetParent[widthProp];\n x -= offsetX - popperRect.width;\n x *= gpuAcceleration ? 1 : -1;\n }\n }\n\n var commonStyles = Object.assign({\n position: position\n }, adaptive && unsetSides);\n\n var _ref4 = roundOffsets === true ? roundOffsetsByDPR({\n x: x,\n y: y\n }, getWindow(popper)) : {\n x: x,\n y: y\n };\n\n x = _ref4.x;\n y = _ref4.y;\n\n if (gpuAcceleration) {\n var _Object$assign;\n\n return Object.assign({}, commonStyles, (_Object$assign = {}, _Object$assign[sideY] = hasY ? '0' : '', _Object$assign[sideX] = hasX ? '0' : '', _Object$assign.transform = (win.devicePixelRatio || 1) <= 1 ? \"translate(\" + x + \"px, \" + y + \"px)\" : \"translate3d(\" + x + \"px, \" + y + \"px, 0)\", _Object$assign));\n }\n\n return Object.assign({}, commonStyles, (_Object$assign2 = {}, _Object$assign2[sideY] = hasY ? y + \"px\" : '', _Object$assign2[sideX] = hasX ? x + \"px\" : '', _Object$assign2.transform = '', _Object$assign2));\n}\n\nfunction computeStyles(_ref5) {\n var state = _ref5.state,\n options = _ref5.options;\n var _options$gpuAccelerat = options.gpuAcceleration,\n gpuAcceleration = _options$gpuAccelerat === void 0 ? true : _options$gpuAccelerat,\n _options$adaptive = options.adaptive,\n adaptive = _options$adaptive === void 0 ? true : _options$adaptive,\n _options$roundOffsets = options.roundOffsets,\n roundOffsets = _options$roundOffsets === void 0 ? true : _options$roundOffsets;\n var commonStyles = {\n placement: getBasePlacement(state.placement),\n variation: getVariation(state.placement),\n popper: state.elements.popper,\n popperRect: state.rects.popper,\n gpuAcceleration: gpuAcceleration,\n isFixed: state.options.strategy === 'fixed'\n };\n\n if (state.modifiersData.popperOffsets != null) {\n state.styles.popper = Object.assign({}, state.styles.popper, mapToStyles(Object.assign({}, commonStyles, {\n offsets: state.modifiersData.popperOffsets,\n position: state.options.strategy,\n adaptive: adaptive,\n roundOffsets: roundOffsets\n })));\n }\n\n if (state.modifiersData.arrow != null) {\n state.styles.arrow = Object.assign({}, state.styles.arrow, mapToStyles(Object.assign({}, commonStyles, {\n offsets: state.modifiersData.arrow,\n position: 'absolute',\n adaptive: false,\n roundOffsets: roundOffsets\n })));\n }\n\n state.attributes.popper = Object.assign({}, state.attributes.popper, {\n 'data-popper-placement': state.placement\n });\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'computeStyles',\n enabled: true,\n phase: 'beforeWrite',\n fn: computeStyles,\n data: {}\n};","import getWindow from \"../dom-utils/getWindow.js\"; // eslint-disable-next-line import/no-unused-modules\n\nvar passive = {\n passive: true\n};\n\nfunction effect(_ref) {\n var state = _ref.state,\n instance = _ref.instance,\n options = _ref.options;\n var _options$scroll = options.scroll,\n scroll = _options$scroll === void 0 ? true : _options$scroll,\n _options$resize = options.resize,\n resize = _options$resize === void 0 ? true : _options$resize;\n var window = getWindow(state.elements.popper);\n var scrollParents = [].concat(state.scrollParents.reference, state.scrollParents.popper);\n\n if (scroll) {\n scrollParents.forEach(function (scrollParent) {\n scrollParent.addEventListener('scroll', instance.update, passive);\n });\n }\n\n if (resize) {\n window.addEventListener('resize', instance.update, passive);\n }\n\n return function () {\n if (scroll) {\n scrollParents.forEach(function (scrollParent) {\n scrollParent.removeEventListener('scroll', instance.update, passive);\n });\n }\n\n if (resize) {\n window.removeEventListener('resize', instance.update, passive);\n }\n };\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'eventListeners',\n enabled: true,\n phase: 'write',\n fn: function fn() {},\n effect: effect,\n data: {}\n};","var hash = {\n left: 'right',\n right: 'left',\n bottom: 'top',\n top: 'bottom'\n};\nexport default function getOppositePlacement(placement) {\n return placement.replace(/left|right|bottom|top/g, function (matched) {\n return hash[matched];\n });\n}","var hash = {\n start: 'end',\n end: 'start'\n};\nexport default function getOppositeVariationPlacement(placement) {\n return placement.replace(/start|end/g, function (matched) {\n return hash[matched];\n });\n}","import getWindow from \"./getWindow.js\";\nexport default function getWindowScroll(node) {\n var win = getWindow(node);\n var scrollLeft = win.pageXOffset;\n var scrollTop = win.pageYOffset;\n return {\n scrollLeft: scrollLeft,\n scrollTop: scrollTop\n };\n}","import getBoundingClientRect from \"./getBoundingClientRect.js\";\nimport getDocumentElement from \"./getDocumentElement.js\";\nimport getWindowScroll from \"./getWindowScroll.js\";\nexport default function getWindowScrollBarX(element) {\n // If has a CSS width greater than the viewport, then this will be\n // incorrect for RTL.\n // Popper 1 is broken in this case and never had a bug report so let's assume\n // it's not an issue. I don't think anyone ever specifies width on \n // anyway.\n // Browsers where the left scrollbar doesn't cause an issue report `0` for\n // this (e.g. Edge 2019, IE11, Safari)\n return getBoundingClientRect(getDocumentElement(element)).left + getWindowScroll(element).scrollLeft;\n}","import getComputedStyle from \"./getComputedStyle.js\";\nexport default function isScrollParent(element) {\n // Firefox wants us to check `-x` and `-y` variations as well\n var _getComputedStyle = getComputedStyle(element),\n overflow = _getComputedStyle.overflow,\n overflowX = _getComputedStyle.overflowX,\n overflowY = _getComputedStyle.overflowY;\n\n return /auto|scroll|overlay|hidden/.test(overflow + overflowY + overflowX);\n}","import getParentNode from \"./getParentNode.js\";\nimport isScrollParent from \"./isScrollParent.js\";\nimport getNodeName from \"./getNodeName.js\";\nimport { isHTMLElement } from \"./instanceOf.js\";\nexport default function getScrollParent(node) {\n if (['html', 'body', '#document'].indexOf(getNodeName(node)) >= 0) {\n // $FlowFixMe[incompatible-return]: assume body is always available\n return node.ownerDocument.body;\n }\n\n if (isHTMLElement(node) && isScrollParent(node)) {\n return node;\n }\n\n return getScrollParent(getParentNode(node));\n}","import getScrollParent from \"./getScrollParent.js\";\nimport getParentNode from \"./getParentNode.js\";\nimport getWindow from \"./getWindow.js\";\nimport isScrollParent from \"./isScrollParent.js\";\n/*\ngiven a DOM element, return the list of all scroll parents, up the list of ancesors\nuntil we get to the top window object. This list is what we attach scroll listeners\nto, because if any of these parent elements scroll, we'll need to re-calculate the\nreference element's position.\n*/\n\nexport default function listScrollParents(element, list) {\n var _element$ownerDocumen;\n\n if (list === void 0) {\n list = [];\n }\n\n var scrollParent = getScrollParent(element);\n var isBody = scrollParent === ((_element$ownerDocumen = element.ownerDocument) == null ? void 0 : _element$ownerDocumen.body);\n var win = getWindow(scrollParent);\n var target = isBody ? [win].concat(win.visualViewport || [], isScrollParent(scrollParent) ? scrollParent : []) : scrollParent;\n var updatedList = list.concat(target);\n return isBody ? updatedList : // $FlowFixMe[incompatible-call]: isBody tells us target will be an HTMLElement here\n updatedList.concat(listScrollParents(getParentNode(target)));\n}","export default function rectToClientRect(rect) {\n return Object.assign({}, rect, {\n left: rect.x,\n top: rect.y,\n right: rect.x + rect.width,\n bottom: rect.y + rect.height\n });\n}","import { viewport } from \"../enums.js\";\nimport getViewportRect from \"./getViewportRect.js\";\nimport getDocumentRect from \"./getDocumentRect.js\";\nimport listScrollParents from \"./listScrollParents.js\";\nimport getOffsetParent from \"./getOffsetParent.js\";\nimport getDocumentElement from \"./getDocumentElement.js\";\nimport getComputedStyle from \"./getComputedStyle.js\";\nimport { isElement, isHTMLElement } from \"./instanceOf.js\";\nimport getBoundingClientRect from \"./getBoundingClientRect.js\";\nimport getParentNode from \"./getParentNode.js\";\nimport contains from \"./contains.js\";\nimport getNodeName from \"./getNodeName.js\";\nimport rectToClientRect from \"../utils/rectToClientRect.js\";\nimport { max, min } from \"../utils/math.js\";\n\nfunction getInnerBoundingClientRect(element, strategy) {\n var rect = getBoundingClientRect(element, false, strategy === 'fixed');\n rect.top = rect.top + element.clientTop;\n rect.left = rect.left + element.clientLeft;\n rect.bottom = rect.top + element.clientHeight;\n rect.right = rect.left + element.clientWidth;\n rect.width = element.clientWidth;\n rect.height = element.clientHeight;\n rect.x = rect.left;\n rect.y = rect.top;\n return rect;\n}\n\nfunction getClientRectFromMixedType(element, clippingParent, strategy) {\n return clippingParent === viewport ? rectToClientRect(getViewportRect(element, strategy)) : isElement(clippingParent) ? getInnerBoundingClientRect(clippingParent, strategy) : rectToClientRect(getDocumentRect(getDocumentElement(element)));\n} // A \"clipping parent\" is an overflowable container with the characteristic of\n// clipping (or hiding) overflowing elements with a position different from\n// `initial`\n\n\nfunction getClippingParents(element) {\n var clippingParents = listScrollParents(getParentNode(element));\n var canEscapeClipping = ['absolute', 'fixed'].indexOf(getComputedStyle(element).position) >= 0;\n var clipperElement = canEscapeClipping && isHTMLElement(element) ? getOffsetParent(element) : element;\n\n if (!isElement(clipperElement)) {\n return [];\n } // $FlowFixMe[incompatible-return]: https://github.com/facebook/flow/issues/1414\n\n\n return clippingParents.filter(function (clippingParent) {\n return isElement(clippingParent) && contains(clippingParent, clipperElement) && getNodeName(clippingParent) !== 'body';\n });\n} // Gets the maximum area that the element is visible in due to any number of\n// clipping parents\n\n\nexport default function getClippingRect(element, boundary, rootBoundary, strategy) {\n var mainClippingParents = boundary === 'clippingParents' ? getClippingParents(element) : [].concat(boundary);\n var clippingParents = [].concat(mainClippingParents, [rootBoundary]);\n var firstClippingParent = clippingParents[0];\n var clippingRect = clippingParents.reduce(function (accRect, clippingParent) {\n var rect = getClientRectFromMixedType(element, clippingParent, strategy);\n accRect.top = max(rect.top, accRect.top);\n accRect.right = min(rect.right, accRect.right);\n accRect.bottom = min(rect.bottom, accRect.bottom);\n accRect.left = max(rect.left, accRect.left);\n return accRect;\n }, getClientRectFromMixedType(element, firstClippingParent, strategy));\n clippingRect.width = clippingRect.right - clippingRect.left;\n clippingRect.height = clippingRect.bottom - clippingRect.top;\n clippingRect.x = clippingRect.left;\n clippingRect.y = clippingRect.top;\n return clippingRect;\n}","import getWindow from \"./getWindow.js\";\nimport getDocumentElement from \"./getDocumentElement.js\";\nimport getWindowScrollBarX from \"./getWindowScrollBarX.js\";\nimport isLayoutViewport from \"./isLayoutViewport.js\";\nexport default function getViewportRect(element, strategy) {\n var win = getWindow(element);\n var html = getDocumentElement(element);\n var visualViewport = win.visualViewport;\n var width = html.clientWidth;\n var height = html.clientHeight;\n var x = 0;\n var y = 0;\n\n if (visualViewport) {\n width = visualViewport.width;\n height = visualViewport.height;\n var layoutViewport = isLayoutViewport();\n\n if (layoutViewport || !layoutViewport && strategy === 'fixed') {\n x = visualViewport.offsetLeft;\n y = visualViewport.offsetTop;\n }\n }\n\n return {\n width: width,\n height: height,\n x: x + getWindowScrollBarX(element),\n y: y\n };\n}","import getDocumentElement from \"./getDocumentElement.js\";\nimport getComputedStyle from \"./getComputedStyle.js\";\nimport getWindowScrollBarX from \"./getWindowScrollBarX.js\";\nimport getWindowScroll from \"./getWindowScroll.js\";\nimport { max } from \"../utils/math.js\"; // Gets the entire size of the scrollable document area, even extending outside\n// of the `` and `` rect bounds if horizontally scrollable\n\nexport default function getDocumentRect(element) {\n var _element$ownerDocumen;\n\n var html = getDocumentElement(element);\n var winScroll = getWindowScroll(element);\n var body = (_element$ownerDocumen = element.ownerDocument) == null ? void 0 : _element$ownerDocumen.body;\n var width = max(html.scrollWidth, html.clientWidth, body ? body.scrollWidth : 0, body ? body.clientWidth : 0);\n var height = max(html.scrollHeight, html.clientHeight, body ? body.scrollHeight : 0, body ? body.clientHeight : 0);\n var x = -winScroll.scrollLeft + getWindowScrollBarX(element);\n var y = -winScroll.scrollTop;\n\n if (getComputedStyle(body || html).direction === 'rtl') {\n x += max(html.clientWidth, body ? body.clientWidth : 0) - width;\n }\n\n return {\n width: width,\n height: height,\n x: x,\n y: y\n };\n}","import getBasePlacement from \"./getBasePlacement.js\";\nimport getVariation from \"./getVariation.js\";\nimport getMainAxisFromPlacement from \"./getMainAxisFromPlacement.js\";\nimport { top, right, bottom, left, start, end } from \"../enums.js\";\nexport default function computeOffsets(_ref) {\n var reference = _ref.reference,\n element = _ref.element,\n placement = _ref.placement;\n var basePlacement = placement ? getBasePlacement(placement) : null;\n var variation = placement ? getVariation(placement) : null;\n var commonX = reference.x + reference.width / 2 - element.width / 2;\n var commonY = reference.y + reference.height / 2 - element.height / 2;\n var offsets;\n\n switch (basePlacement) {\n case top:\n offsets = {\n x: commonX,\n y: reference.y - element.height\n };\n break;\n\n case bottom:\n offsets = {\n x: commonX,\n y: reference.y + reference.height\n };\n break;\n\n case right:\n offsets = {\n x: reference.x + reference.width,\n y: commonY\n };\n break;\n\n case left:\n offsets = {\n x: reference.x - element.width,\n y: commonY\n };\n break;\n\n default:\n offsets = {\n x: reference.x,\n y: reference.y\n };\n }\n\n var mainAxis = basePlacement ? getMainAxisFromPlacement(basePlacement) : null;\n\n if (mainAxis != null) {\n var len = mainAxis === 'y' ? 'height' : 'width';\n\n switch (variation) {\n case start:\n offsets[mainAxis] = offsets[mainAxis] - (reference[len] / 2 - element[len] / 2);\n break;\n\n case end:\n offsets[mainAxis] = offsets[mainAxis] + (reference[len] / 2 - element[len] / 2);\n break;\n\n default:\n }\n }\n\n return offsets;\n}","import getClippingRect from \"../dom-utils/getClippingRect.js\";\nimport getDocumentElement from \"../dom-utils/getDocumentElement.js\";\nimport getBoundingClientRect from \"../dom-utils/getBoundingClientRect.js\";\nimport computeOffsets from \"./computeOffsets.js\";\nimport rectToClientRect from \"./rectToClientRect.js\";\nimport { clippingParents, reference, popper, bottom, top, right, basePlacements, viewport } from \"../enums.js\";\nimport { isElement } from \"../dom-utils/instanceOf.js\";\nimport mergePaddingObject from \"./mergePaddingObject.js\";\nimport expandToHashMap from \"./expandToHashMap.js\"; // eslint-disable-next-line import/no-unused-modules\n\nexport default function detectOverflow(state, options) {\n if (options === void 0) {\n options = {};\n }\n\n var _options = options,\n _options$placement = _options.placement,\n placement = _options$placement === void 0 ? state.placement : _options$placement,\n _options$strategy = _options.strategy,\n strategy = _options$strategy === void 0 ? state.strategy : _options$strategy,\n _options$boundary = _options.boundary,\n boundary = _options$boundary === void 0 ? clippingParents : _options$boundary,\n _options$rootBoundary = _options.rootBoundary,\n rootBoundary = _options$rootBoundary === void 0 ? viewport : _options$rootBoundary,\n _options$elementConte = _options.elementContext,\n elementContext = _options$elementConte === void 0 ? popper : _options$elementConte,\n _options$altBoundary = _options.altBoundary,\n altBoundary = _options$altBoundary === void 0 ? false : _options$altBoundary,\n _options$padding = _options.padding,\n padding = _options$padding === void 0 ? 0 : _options$padding;\n var paddingObject = mergePaddingObject(typeof padding !== 'number' ? padding : expandToHashMap(padding, basePlacements));\n var altContext = elementContext === popper ? reference : popper;\n var popperRect = state.rects.popper;\n var element = state.elements[altBoundary ? altContext : elementContext];\n var clippingClientRect = getClippingRect(isElement(element) ? element : element.contextElement || getDocumentElement(state.elements.popper), boundary, rootBoundary, strategy);\n var referenceClientRect = getBoundingClientRect(state.elements.reference);\n var popperOffsets = computeOffsets({\n reference: referenceClientRect,\n element: popperRect,\n strategy: 'absolute',\n placement: placement\n });\n var popperClientRect = rectToClientRect(Object.assign({}, popperRect, popperOffsets));\n var elementClientRect = elementContext === popper ? popperClientRect : referenceClientRect; // positive = overflowing the clipping rect\n // 0 or negative = within the clipping rect\n\n var overflowOffsets = {\n top: clippingClientRect.top - elementClientRect.top + paddingObject.top,\n bottom: elementClientRect.bottom - clippingClientRect.bottom + paddingObject.bottom,\n left: clippingClientRect.left - elementClientRect.left + paddingObject.left,\n right: elementClientRect.right - clippingClientRect.right + paddingObject.right\n };\n var offsetData = state.modifiersData.offset; // Offsets can be applied only to the popper element\n\n if (elementContext === popper && offsetData) {\n var offset = offsetData[placement];\n Object.keys(overflowOffsets).forEach(function (key) {\n var multiply = [right, bottom].indexOf(key) >= 0 ? 1 : -1;\n var axis = [top, bottom].indexOf(key) >= 0 ? 'y' : 'x';\n overflowOffsets[key] += offset[axis] * multiply;\n });\n }\n\n return overflowOffsets;\n}","import getVariation from \"./getVariation.js\";\nimport { variationPlacements, basePlacements, placements as allPlacements } from \"../enums.js\";\nimport detectOverflow from \"./detectOverflow.js\";\nimport getBasePlacement from \"./getBasePlacement.js\";\nexport default function computeAutoPlacement(state, options) {\n if (options === void 0) {\n options = {};\n }\n\n var _options = options,\n placement = _options.placement,\n boundary = _options.boundary,\n rootBoundary = _options.rootBoundary,\n padding = _options.padding,\n flipVariations = _options.flipVariations,\n _options$allowedAutoP = _options.allowedAutoPlacements,\n allowedAutoPlacements = _options$allowedAutoP === void 0 ? allPlacements : _options$allowedAutoP;\n var variation = getVariation(placement);\n var placements = variation ? flipVariations ? variationPlacements : variationPlacements.filter(function (placement) {\n return getVariation(placement) === variation;\n }) : basePlacements;\n var allowedPlacements = placements.filter(function (placement) {\n return allowedAutoPlacements.indexOf(placement) >= 0;\n });\n\n if (allowedPlacements.length === 0) {\n allowedPlacements = placements;\n } // $FlowFixMe[incompatible-type]: Flow seems to have problems with two array unions...\n\n\n var overflows = allowedPlacements.reduce(function (acc, placement) {\n acc[placement] = detectOverflow(state, {\n placement: placement,\n boundary: boundary,\n rootBoundary: rootBoundary,\n padding: padding\n })[getBasePlacement(placement)];\n return acc;\n }, {});\n return Object.keys(overflows).sort(function (a, b) {\n return overflows[a] - overflows[b];\n });\n}","import getOppositePlacement from \"../utils/getOppositePlacement.js\";\nimport getBasePlacement from \"../utils/getBasePlacement.js\";\nimport getOppositeVariationPlacement from \"../utils/getOppositeVariationPlacement.js\";\nimport detectOverflow from \"../utils/detectOverflow.js\";\nimport computeAutoPlacement from \"../utils/computeAutoPlacement.js\";\nimport { bottom, top, start, right, left, auto } from \"../enums.js\";\nimport getVariation from \"../utils/getVariation.js\"; // eslint-disable-next-line import/no-unused-modules\n\nfunction getExpandedFallbackPlacements(placement) {\n if (getBasePlacement(placement) === auto) {\n return [];\n }\n\n var oppositePlacement = getOppositePlacement(placement);\n return [getOppositeVariationPlacement(placement), oppositePlacement, getOppositeVariationPlacement(oppositePlacement)];\n}\n\nfunction flip(_ref) {\n var state = _ref.state,\n options = _ref.options,\n name = _ref.name;\n\n if (state.modifiersData[name]._skip) {\n return;\n }\n\n var _options$mainAxis = options.mainAxis,\n checkMainAxis = _options$mainAxis === void 0 ? true : _options$mainAxis,\n _options$altAxis = options.altAxis,\n checkAltAxis = _options$altAxis === void 0 ? true : _options$altAxis,\n specifiedFallbackPlacements = options.fallbackPlacements,\n padding = options.padding,\n boundary = options.boundary,\n rootBoundary = options.rootBoundary,\n altBoundary = options.altBoundary,\n _options$flipVariatio = options.flipVariations,\n flipVariations = _options$flipVariatio === void 0 ? true : _options$flipVariatio,\n allowedAutoPlacements = options.allowedAutoPlacements;\n var preferredPlacement = state.options.placement;\n var basePlacement = getBasePlacement(preferredPlacement);\n var isBasePlacement = basePlacement === preferredPlacement;\n var fallbackPlacements = specifiedFallbackPlacements || (isBasePlacement || !flipVariations ? [getOppositePlacement(preferredPlacement)] : getExpandedFallbackPlacements(preferredPlacement));\n var placements = [preferredPlacement].concat(fallbackPlacements).reduce(function (acc, placement) {\n return acc.concat(getBasePlacement(placement) === auto ? computeAutoPlacement(state, {\n placement: placement,\n boundary: boundary,\n rootBoundary: rootBoundary,\n padding: padding,\n flipVariations: flipVariations,\n allowedAutoPlacements: allowedAutoPlacements\n }) : placement);\n }, []);\n var referenceRect = state.rects.reference;\n var popperRect = state.rects.popper;\n var checksMap = new Map();\n var makeFallbackChecks = true;\n var firstFittingPlacement = placements[0];\n\n for (var i = 0; i < placements.length; i++) {\n var placement = placements[i];\n\n var _basePlacement = getBasePlacement(placement);\n\n var isStartVariation = getVariation(placement) === start;\n var isVertical = [top, bottom].indexOf(_basePlacement) >= 0;\n var len = isVertical ? 'width' : 'height';\n var overflow = detectOverflow(state, {\n placement: placement,\n boundary: boundary,\n rootBoundary: rootBoundary,\n altBoundary: altBoundary,\n padding: padding\n });\n var mainVariationSide = isVertical ? isStartVariation ? right : left : isStartVariation ? bottom : top;\n\n if (referenceRect[len] > popperRect[len]) {\n mainVariationSide = getOppositePlacement(mainVariationSide);\n }\n\n var altVariationSide = getOppositePlacement(mainVariationSide);\n var checks = [];\n\n if (checkMainAxis) {\n checks.push(overflow[_basePlacement] <= 0);\n }\n\n if (checkAltAxis) {\n checks.push(overflow[mainVariationSide] <= 0, overflow[altVariationSide] <= 0);\n }\n\n if (checks.every(function (check) {\n return check;\n })) {\n firstFittingPlacement = placement;\n makeFallbackChecks = false;\n break;\n }\n\n checksMap.set(placement, checks);\n }\n\n if (makeFallbackChecks) {\n // `2` may be desired in some cases – research later\n var numberOfChecks = flipVariations ? 3 : 1;\n\n var _loop = function _loop(_i) {\n var fittingPlacement = placements.find(function (placement) {\n var checks = checksMap.get(placement);\n\n if (checks) {\n return checks.slice(0, _i).every(function (check) {\n return check;\n });\n }\n });\n\n if (fittingPlacement) {\n firstFittingPlacement = fittingPlacement;\n return \"break\";\n }\n };\n\n for (var _i = numberOfChecks; _i > 0; _i--) {\n var _ret = _loop(_i);\n\n if (_ret === \"break\") break;\n }\n }\n\n if (state.placement !== firstFittingPlacement) {\n state.modifiersData[name]._skip = true;\n state.placement = firstFittingPlacement;\n state.reset = true;\n }\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'flip',\n enabled: true,\n phase: 'main',\n fn: flip,\n requiresIfExists: ['offset'],\n data: {\n _skip: false\n }\n};","import { top, bottom, left, right } from \"../enums.js\";\nimport detectOverflow from \"../utils/detectOverflow.js\";\n\nfunction getSideOffsets(overflow, rect, preventedOffsets) {\n if (preventedOffsets === void 0) {\n preventedOffsets = {\n x: 0,\n y: 0\n };\n }\n\n return {\n top: overflow.top - rect.height - preventedOffsets.y,\n right: overflow.right - rect.width + preventedOffsets.x,\n bottom: overflow.bottom - rect.height + preventedOffsets.y,\n left: overflow.left - rect.width - preventedOffsets.x\n };\n}\n\nfunction isAnySideFullyClipped(overflow) {\n return [top, right, bottom, left].some(function (side) {\n return overflow[side] >= 0;\n });\n}\n\nfunction hide(_ref) {\n var state = _ref.state,\n name = _ref.name;\n var referenceRect = state.rects.reference;\n var popperRect = state.rects.popper;\n var preventedOffsets = state.modifiersData.preventOverflow;\n var referenceOverflow = detectOverflow(state, {\n elementContext: 'reference'\n });\n var popperAltOverflow = detectOverflow(state, {\n altBoundary: true\n });\n var referenceClippingOffsets = getSideOffsets(referenceOverflow, referenceRect);\n var popperEscapeOffsets = getSideOffsets(popperAltOverflow, popperRect, preventedOffsets);\n var isReferenceHidden = isAnySideFullyClipped(referenceClippingOffsets);\n var hasPopperEscaped = isAnySideFullyClipped(popperEscapeOffsets);\n state.modifiersData[name] = {\n referenceClippingOffsets: referenceClippingOffsets,\n popperEscapeOffsets: popperEscapeOffsets,\n isReferenceHidden: isReferenceHidden,\n hasPopperEscaped: hasPopperEscaped\n };\n state.attributes.popper = Object.assign({}, state.attributes.popper, {\n 'data-popper-reference-hidden': isReferenceHidden,\n 'data-popper-escaped': hasPopperEscaped\n });\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'hide',\n enabled: true,\n phase: 'main',\n requiresIfExists: ['preventOverflow'],\n fn: hide\n};","import getBasePlacement from \"../utils/getBasePlacement.js\";\nimport { top, left, right, placements } from \"../enums.js\"; // eslint-disable-next-line import/no-unused-modules\n\nexport function distanceAndSkiddingToXY(placement, rects, offset) {\n var basePlacement = getBasePlacement(placement);\n var invertDistance = [left, top].indexOf(basePlacement) >= 0 ? -1 : 1;\n\n var _ref = typeof offset === 'function' ? offset(Object.assign({}, rects, {\n placement: placement\n })) : offset,\n skidding = _ref[0],\n distance = _ref[1];\n\n skidding = skidding || 0;\n distance = (distance || 0) * invertDistance;\n return [left, right].indexOf(basePlacement) >= 0 ? {\n x: distance,\n y: skidding\n } : {\n x: skidding,\n y: distance\n };\n}\n\nfunction offset(_ref2) {\n var state = _ref2.state,\n options = _ref2.options,\n name = _ref2.name;\n var _options$offset = options.offset,\n offset = _options$offset === void 0 ? [0, 0] : _options$offset;\n var data = placements.reduce(function (acc, placement) {\n acc[placement] = distanceAndSkiddingToXY(placement, state.rects, offset);\n return acc;\n }, {});\n var _data$state$placement = data[state.placement],\n x = _data$state$placement.x,\n y = _data$state$placement.y;\n\n if (state.modifiersData.popperOffsets != null) {\n state.modifiersData.popperOffsets.x += x;\n state.modifiersData.popperOffsets.y += y;\n }\n\n state.modifiersData[name] = data;\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'offset',\n enabled: true,\n phase: 'main',\n requires: ['popperOffsets'],\n fn: offset\n};","import computeOffsets from \"../utils/computeOffsets.js\";\n\nfunction popperOffsets(_ref) {\n var state = _ref.state,\n name = _ref.name;\n // Offsets are the actual position the popper needs to have to be\n // properly positioned near its reference element\n // This is the most basic placement, and will be adjusted by\n // the modifiers in the next step\n state.modifiersData[name] = computeOffsets({\n reference: state.rects.reference,\n element: state.rects.popper,\n strategy: 'absolute',\n placement: state.placement\n });\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'popperOffsets',\n enabled: true,\n phase: 'read',\n fn: popperOffsets,\n data: {}\n};","import { top, left, right, bottom, start } from \"../enums.js\";\nimport getBasePlacement from \"../utils/getBasePlacement.js\";\nimport getMainAxisFromPlacement from \"../utils/getMainAxisFromPlacement.js\";\nimport getAltAxis from \"../utils/getAltAxis.js\";\nimport { within, withinMaxClamp } from \"../utils/within.js\";\nimport getLayoutRect from \"../dom-utils/getLayoutRect.js\";\nimport getOffsetParent from \"../dom-utils/getOffsetParent.js\";\nimport detectOverflow from \"../utils/detectOverflow.js\";\nimport getVariation from \"../utils/getVariation.js\";\nimport getFreshSideObject from \"../utils/getFreshSideObject.js\";\nimport { min as mathMin, max as mathMax } from \"../utils/math.js\";\n\nfunction preventOverflow(_ref) {\n var state = _ref.state,\n options = _ref.options,\n name = _ref.name;\n var _options$mainAxis = options.mainAxis,\n checkMainAxis = _options$mainAxis === void 0 ? true : _options$mainAxis,\n _options$altAxis = options.altAxis,\n checkAltAxis = _options$altAxis === void 0 ? false : _options$altAxis,\n boundary = options.boundary,\n rootBoundary = options.rootBoundary,\n altBoundary = options.altBoundary,\n padding = options.padding,\n _options$tether = options.tether,\n tether = _options$tether === void 0 ? true : _options$tether,\n _options$tetherOffset = options.tetherOffset,\n tetherOffset = _options$tetherOffset === void 0 ? 0 : _options$tetherOffset;\n var overflow = detectOverflow(state, {\n boundary: boundary,\n rootBoundary: rootBoundary,\n padding: padding,\n altBoundary: altBoundary\n });\n var basePlacement = getBasePlacement(state.placement);\n var variation = getVariation(state.placement);\n var isBasePlacement = !variation;\n var mainAxis = getMainAxisFromPlacement(basePlacement);\n var altAxis = getAltAxis(mainAxis);\n var popperOffsets = state.modifiersData.popperOffsets;\n var referenceRect = state.rects.reference;\n var popperRect = state.rects.popper;\n var tetherOffsetValue = typeof tetherOffset === 'function' ? tetherOffset(Object.assign({}, state.rects, {\n placement: state.placement\n })) : tetherOffset;\n var normalizedTetherOffsetValue = typeof tetherOffsetValue === 'number' ? {\n mainAxis: tetherOffsetValue,\n altAxis: tetherOffsetValue\n } : Object.assign({\n mainAxis: 0,\n altAxis: 0\n }, tetherOffsetValue);\n var offsetModifierState = state.modifiersData.offset ? state.modifiersData.offset[state.placement] : null;\n var data = {\n x: 0,\n y: 0\n };\n\n if (!popperOffsets) {\n return;\n }\n\n if (checkMainAxis) {\n var _offsetModifierState$;\n\n var mainSide = mainAxis === 'y' ? top : left;\n var altSide = mainAxis === 'y' ? bottom : right;\n var len = mainAxis === 'y' ? 'height' : 'width';\n var offset = popperOffsets[mainAxis];\n var min = offset + overflow[mainSide];\n var max = offset - overflow[altSide];\n var additive = tether ? -popperRect[len] / 2 : 0;\n var minLen = variation === start ? referenceRect[len] : popperRect[len];\n var maxLen = variation === start ? -popperRect[len] : -referenceRect[len]; // We need to include the arrow in the calculation so the arrow doesn't go\n // outside the reference bounds\n\n var arrowElement = state.elements.arrow;\n var arrowRect = tether && arrowElement ? getLayoutRect(arrowElement) : {\n width: 0,\n height: 0\n };\n var arrowPaddingObject = state.modifiersData['arrow#persistent'] ? state.modifiersData['arrow#persistent'].padding : getFreshSideObject();\n var arrowPaddingMin = arrowPaddingObject[mainSide];\n var arrowPaddingMax = arrowPaddingObject[altSide]; // If the reference length is smaller than the arrow length, we don't want\n // to include its full size in the calculation. If the reference is small\n // and near the edge of a boundary, the popper can overflow even if the\n // reference is not overflowing as well (e.g. virtual elements with no\n // width or height)\n\n var arrowLen = within(0, referenceRect[len], arrowRect[len]);\n var minOffset = isBasePlacement ? referenceRect[len] / 2 - additive - arrowLen - arrowPaddingMin - normalizedTetherOffsetValue.mainAxis : minLen - arrowLen - arrowPaddingMin - normalizedTetherOffsetValue.mainAxis;\n var maxOffset = isBasePlacement ? -referenceRect[len] / 2 + additive + arrowLen + arrowPaddingMax + normalizedTetherOffsetValue.mainAxis : maxLen + arrowLen + arrowPaddingMax + normalizedTetherOffsetValue.mainAxis;\n var arrowOffsetParent = state.elements.arrow && getOffsetParent(state.elements.arrow);\n var clientOffset = arrowOffsetParent ? mainAxis === 'y' ? arrowOffsetParent.clientTop || 0 : arrowOffsetParent.clientLeft || 0 : 0;\n var offsetModifierValue = (_offsetModifierState$ = offsetModifierState == null ? void 0 : offsetModifierState[mainAxis]) != null ? _offsetModifierState$ : 0;\n var tetherMin = offset + minOffset - offsetModifierValue - clientOffset;\n var tetherMax = offset + maxOffset - offsetModifierValue;\n var preventedOffset = within(tether ? mathMin(min, tetherMin) : min, offset, tether ? mathMax(max, tetherMax) : max);\n popperOffsets[mainAxis] = preventedOffset;\n data[mainAxis] = preventedOffset - offset;\n }\n\n if (checkAltAxis) {\n var _offsetModifierState$2;\n\n var _mainSide = mainAxis === 'x' ? top : left;\n\n var _altSide = mainAxis === 'x' ? bottom : right;\n\n var _offset = popperOffsets[altAxis];\n\n var _len = altAxis === 'y' ? 'height' : 'width';\n\n var _min = _offset + overflow[_mainSide];\n\n var _max = _offset - overflow[_altSide];\n\n var isOriginSide = [top, left].indexOf(basePlacement) !== -1;\n\n var _offsetModifierValue = (_offsetModifierState$2 = offsetModifierState == null ? void 0 : offsetModifierState[altAxis]) != null ? _offsetModifierState$2 : 0;\n\n var _tetherMin = isOriginSide ? _min : _offset - referenceRect[_len] - popperRect[_len] - _offsetModifierValue + normalizedTetherOffsetValue.altAxis;\n\n var _tetherMax = isOriginSide ? _offset + referenceRect[_len] + popperRect[_len] - _offsetModifierValue - normalizedTetherOffsetValue.altAxis : _max;\n\n var _preventedOffset = tether && isOriginSide ? withinMaxClamp(_tetherMin, _offset, _tetherMax) : within(tether ? _tetherMin : _min, _offset, tether ? _tetherMax : _max);\n\n popperOffsets[altAxis] = _preventedOffset;\n data[altAxis] = _preventedOffset - _offset;\n }\n\n state.modifiersData[name] = data;\n} // eslint-disable-next-line import/no-unused-modules\n\n\nexport default {\n name: 'preventOverflow',\n enabled: true,\n phase: 'main',\n fn: preventOverflow,\n requiresIfExists: ['offset']\n};","export default function getAltAxis(axis) {\n return axis === 'x' ? 'y' : 'x';\n}","import getBoundingClientRect from \"./getBoundingClientRect.js\";\nimport getNodeScroll from \"./getNodeScroll.js\";\nimport getNodeName from \"./getNodeName.js\";\nimport { isHTMLElement } from \"./instanceOf.js\";\nimport getWindowScrollBarX from \"./getWindowScrollBarX.js\";\nimport getDocumentElement from \"./getDocumentElement.js\";\nimport isScrollParent from \"./isScrollParent.js\";\nimport { round } from \"../utils/math.js\";\n\nfunction isElementScaled(element) {\n var rect = element.getBoundingClientRect();\n var scaleX = round(rect.width) / element.offsetWidth || 1;\n var scaleY = round(rect.height) / element.offsetHeight || 1;\n return scaleX !== 1 || scaleY !== 1;\n} // Returns the composite rect of an element relative to its offsetParent.\n// Composite means it takes into account transforms as well as layout.\n\n\nexport default function getCompositeRect(elementOrVirtualElement, offsetParent, isFixed) {\n if (isFixed === void 0) {\n isFixed = false;\n }\n\n var isOffsetParentAnElement = isHTMLElement(offsetParent);\n var offsetParentIsScaled = isHTMLElement(offsetParent) && isElementScaled(offsetParent);\n var documentElement = getDocumentElement(offsetParent);\n var rect = getBoundingClientRect(elementOrVirtualElement, offsetParentIsScaled, isFixed);\n var scroll = {\n scrollLeft: 0,\n scrollTop: 0\n };\n var offsets = {\n x: 0,\n y: 0\n };\n\n if (isOffsetParentAnElement || !isOffsetParentAnElement && !isFixed) {\n if (getNodeName(offsetParent) !== 'body' || // https://github.com/popperjs/popper-core/issues/1078\n isScrollParent(documentElement)) {\n scroll = getNodeScroll(offsetParent);\n }\n\n if (isHTMLElement(offsetParent)) {\n offsets = getBoundingClientRect(offsetParent, true);\n offsets.x += offsetParent.clientLeft;\n offsets.y += offsetParent.clientTop;\n } else if (documentElement) {\n offsets.x = getWindowScrollBarX(documentElement);\n }\n }\n\n return {\n x: rect.left + scroll.scrollLeft - offsets.x,\n y: rect.top + scroll.scrollTop - offsets.y,\n width: rect.width,\n height: rect.height\n };\n}","import getWindowScroll from \"./getWindowScroll.js\";\nimport getWindow from \"./getWindow.js\";\nimport { isHTMLElement } from \"./instanceOf.js\";\nimport getHTMLElementScroll from \"./getHTMLElementScroll.js\";\nexport default function getNodeScroll(node) {\n if (node === getWindow(node) || !isHTMLElement(node)) {\n return getWindowScroll(node);\n } else {\n return getHTMLElementScroll(node);\n }\n}","export default function getHTMLElementScroll(element) {\n return {\n scrollLeft: element.scrollLeft,\n scrollTop: element.scrollTop\n };\n}","import { modifierPhases } from \"../enums.js\"; // source: https://stackoverflow.com/questions/49875255\n\nfunction order(modifiers) {\n var map = new Map();\n var visited = new Set();\n var result = [];\n modifiers.forEach(function (modifier) {\n map.set(modifier.name, modifier);\n }); // On visiting object, check for its dependencies and visit them recursively\n\n function sort(modifier) {\n visited.add(modifier.name);\n var requires = [].concat(modifier.requires || [], modifier.requiresIfExists || []);\n requires.forEach(function (dep) {\n if (!visited.has(dep)) {\n var depModifier = map.get(dep);\n\n if (depModifier) {\n sort(depModifier);\n }\n }\n });\n result.push(modifier);\n }\n\n modifiers.forEach(function (modifier) {\n if (!visited.has(modifier.name)) {\n // check for visited object\n sort(modifier);\n }\n });\n return result;\n}\n\nexport default function orderModifiers(modifiers) {\n // order based on dependencies\n var orderedModifiers = order(modifiers); // order based on phase\n\n return modifierPhases.reduce(function (acc, phase) {\n return acc.concat(orderedModifiers.filter(function (modifier) {\n return modifier.phase === phase;\n }));\n }, []);\n}","import getCompositeRect from \"./dom-utils/getCompositeRect.js\";\nimport getLayoutRect from \"./dom-utils/getLayoutRect.js\";\nimport listScrollParents from \"./dom-utils/listScrollParents.js\";\nimport getOffsetParent from \"./dom-utils/getOffsetParent.js\";\nimport orderModifiers from \"./utils/orderModifiers.js\";\nimport debounce from \"./utils/debounce.js\";\nimport mergeByName from \"./utils/mergeByName.js\";\nimport detectOverflow from \"./utils/detectOverflow.js\";\nimport { isElement } from \"./dom-utils/instanceOf.js\";\nvar DEFAULT_OPTIONS = {\n placement: 'bottom',\n modifiers: [],\n strategy: 'absolute'\n};\n\nfunction areValidElements() {\n for (var _len = arguments.length, args = new Array(_len), _key = 0; _key < _len; _key++) {\n args[_key] = arguments[_key];\n }\n\n return !args.some(function (element) {\n return !(element && typeof element.getBoundingClientRect === 'function');\n });\n}\n\nexport function popperGenerator(generatorOptions) {\n if (generatorOptions === void 0) {\n generatorOptions = {};\n }\n\n var _generatorOptions = generatorOptions,\n _generatorOptions$def = _generatorOptions.defaultModifiers,\n defaultModifiers = _generatorOptions$def === void 0 ? [] : _generatorOptions$def,\n _generatorOptions$def2 = _generatorOptions.defaultOptions,\n defaultOptions = _generatorOptions$def2 === void 0 ? DEFAULT_OPTIONS : _generatorOptions$def2;\n return function createPopper(reference, popper, options) {\n if (options === void 0) {\n options = defaultOptions;\n }\n\n var state = {\n placement: 'bottom',\n orderedModifiers: [],\n options: Object.assign({}, DEFAULT_OPTIONS, defaultOptions),\n modifiersData: {},\n elements: {\n reference: reference,\n popper: popper\n },\n attributes: {},\n styles: {}\n };\n var effectCleanupFns = [];\n var isDestroyed = false;\n var instance = {\n state: state,\n setOptions: function setOptions(setOptionsAction) {\n var options = typeof setOptionsAction === 'function' ? setOptionsAction(state.options) : setOptionsAction;\n cleanupModifierEffects();\n state.options = Object.assign({}, defaultOptions, state.options, options);\n state.scrollParents = {\n reference: isElement(reference) ? listScrollParents(reference) : reference.contextElement ? listScrollParents(reference.contextElement) : [],\n popper: listScrollParents(popper)\n }; // Orders the modifiers based on their dependencies and `phase`\n // properties\n\n var orderedModifiers = orderModifiers(mergeByName([].concat(defaultModifiers, state.options.modifiers))); // Strip out disabled modifiers\n\n state.orderedModifiers = orderedModifiers.filter(function (m) {\n return m.enabled;\n });\n runModifierEffects();\n return instance.update();\n },\n // Sync update – it will always be executed, even if not necessary. This\n // is useful for low frequency updates where sync behavior simplifies the\n // logic.\n // For high frequency updates (e.g. `resize` and `scroll` events), always\n // prefer the async Popper#update method\n forceUpdate: function forceUpdate() {\n if (isDestroyed) {\n return;\n }\n\n var _state$elements = state.elements,\n reference = _state$elements.reference,\n popper = _state$elements.popper; // Don't proceed if `reference` or `popper` are not valid elements\n // anymore\n\n if (!areValidElements(reference, popper)) {\n return;\n } // Store the reference and popper rects to be read by modifiers\n\n\n state.rects = {\n reference: getCompositeRect(reference, getOffsetParent(popper), state.options.strategy === 'fixed'),\n popper: getLayoutRect(popper)\n }; // Modifiers have the ability to reset the current update cycle. The\n // most common use case for this is the `flip` modifier changing the\n // placement, which then needs to re-run all the modifiers, because the\n // logic was previously ran for the previous placement and is therefore\n // stale/incorrect\n\n state.reset = false;\n state.placement = state.options.placement; // On each update cycle, the `modifiersData` property for each modifier\n // is filled with the initial data specified by the modifier. This means\n // it doesn't persist and is fresh on each update.\n // To ensure persistent data, use `${name}#persistent`\n\n state.orderedModifiers.forEach(function (modifier) {\n return state.modifiersData[modifier.name] = Object.assign({}, modifier.data);\n });\n\n for (var index = 0; index < state.orderedModifiers.length; index++) {\n if (state.reset === true) {\n state.reset = false;\n index = -1;\n continue;\n }\n\n var _state$orderedModifie = state.orderedModifiers[index],\n fn = _state$orderedModifie.fn,\n _state$orderedModifie2 = _state$orderedModifie.options,\n _options = _state$orderedModifie2 === void 0 ? {} : _state$orderedModifie2,\n name = _state$orderedModifie.name;\n\n if (typeof fn === 'function') {\n state = fn({\n state: state,\n options: _options,\n name: name,\n instance: instance\n }) || state;\n }\n }\n },\n // Async and optimistically optimized update – it will not be executed if\n // not necessary (debounced to run at most once-per-tick)\n update: debounce(function () {\n return new Promise(function (resolve) {\n instance.forceUpdate();\n resolve(state);\n });\n }),\n destroy: function destroy() {\n cleanupModifierEffects();\n isDestroyed = true;\n }\n };\n\n if (!areValidElements(reference, popper)) {\n return instance;\n }\n\n instance.setOptions(options).then(function (state) {\n if (!isDestroyed && options.onFirstUpdate) {\n options.onFirstUpdate(state);\n }\n }); // Modifiers have the ability to execute arbitrary code before the first\n // update cycle runs. They will be executed in the same order as the update\n // cycle. This is useful when a modifier adds some persistent data that\n // other modifiers need to use, but the modifier is run after the dependent\n // one.\n\n function runModifierEffects() {\n state.orderedModifiers.forEach(function (_ref) {\n var name = _ref.name,\n _ref$options = _ref.options,\n options = _ref$options === void 0 ? {} : _ref$options,\n effect = _ref.effect;\n\n if (typeof effect === 'function') {\n var cleanupFn = effect({\n state: state,\n name: name,\n instance: instance,\n options: options\n });\n\n var noopFn = function noopFn() {};\n\n effectCleanupFns.push(cleanupFn || noopFn);\n }\n });\n }\n\n function cleanupModifierEffects() {\n effectCleanupFns.forEach(function (fn) {\n return fn();\n });\n effectCleanupFns = [];\n }\n\n return instance;\n };\n}\nexport var createPopper = /*#__PURE__*/popperGenerator(); // eslint-disable-next-line import/no-unused-modules\n\nexport { detectOverflow };","export default function debounce(fn) {\n var pending;\n return function () {\n if (!pending) {\n pending = new Promise(function (resolve) {\n Promise.resolve().then(function () {\n pending = undefined;\n resolve(fn());\n });\n });\n }\n\n return pending;\n };\n}","export default function mergeByName(modifiers) {\n var merged = modifiers.reduce(function (merged, current) {\n var existing = merged[current.name];\n merged[current.name] = existing ? Object.assign({}, existing, current, {\n options: Object.assign({}, existing.options, current.options),\n data: Object.assign({}, existing.data, current.data)\n }) : current;\n return merged;\n }, {}); // IE11 does not support Object.values\n\n return Object.keys(merged).map(function (key) {\n return merged[key];\n });\n}","import { popperGenerator, detectOverflow } from \"./createPopper.js\";\nimport eventListeners from \"./modifiers/eventListeners.js\";\nimport popperOffsets from \"./modifiers/popperOffsets.js\";\nimport computeStyles from \"./modifiers/computeStyles.js\";\nimport applyStyles from \"./modifiers/applyStyles.js\";\nvar defaultModifiers = [eventListeners, popperOffsets, computeStyles, applyStyles];\nvar createPopper = /*#__PURE__*/popperGenerator({\n defaultModifiers: defaultModifiers\n}); // eslint-disable-next-line import/no-unused-modules\n\nexport { createPopper, popperGenerator, defaultModifiers, detectOverflow };","import { popperGenerator, detectOverflow } from \"./createPopper.js\";\nimport eventListeners from \"./modifiers/eventListeners.js\";\nimport popperOffsets from \"./modifiers/popperOffsets.js\";\nimport computeStyles from \"./modifiers/computeStyles.js\";\nimport applyStyles from \"./modifiers/applyStyles.js\";\nimport offset from \"./modifiers/offset.js\";\nimport flip from \"./modifiers/flip.js\";\nimport preventOverflow from \"./modifiers/preventOverflow.js\";\nimport arrow from \"./modifiers/arrow.js\";\nimport hide from \"./modifiers/hide.js\";\nvar defaultModifiers = [eventListeners, popperOffsets, computeStyles, applyStyles, offset, flip, preventOverflow, arrow, hide];\nvar createPopper = /*#__PURE__*/popperGenerator({\n defaultModifiers: defaultModifiers\n}); // eslint-disable-next-line import/no-unused-modules\n\nexport { createPopper, popperGenerator, defaultModifiers, detectOverflow }; // eslint-disable-next-line import/no-unused-modules\n\nexport { createPopper as createPopperLite } from \"./popper-lite.js\"; // eslint-disable-next-line import/no-unused-modules\n\nexport * from \"./modifiers/index.js\";","/**\n * --------------------------------------------------------------------------\n * Bootstrap dropdown.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport * as Popper from '@popperjs/core'\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport Manipulator from './dom/manipulator.js'\nimport SelectorEngine from './dom/selector-engine.js'\nimport {\n defineJQueryPlugin,\n execute,\n getElement,\n getNextActiveElement,\n isDisabled,\n isElement,\n isRTL,\n isVisible,\n noop\n} from './util/index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'dropdown'\nconst DATA_KEY = 'bs.dropdown'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst DATA_API_KEY = '.data-api'\n\nconst ESCAPE_KEY = 'Escape'\nconst TAB_KEY = 'Tab'\nconst ARROW_UP_KEY = 'ArrowUp'\nconst ARROW_DOWN_KEY = 'ArrowDown'\nconst RIGHT_MOUSE_BUTTON = 2 // MouseEvent.button value for the secondary button, usually the right button\n\nconst EVENT_HIDE = `hide${EVENT_KEY}`\nconst EVENT_HIDDEN = `hidden${EVENT_KEY}`\nconst EVENT_SHOW = `show${EVENT_KEY}`\nconst EVENT_SHOWN = `shown${EVENT_KEY}`\nconst EVENT_CLICK_DATA_API = `click${EVENT_KEY}${DATA_API_KEY}`\nconst EVENT_KEYDOWN_DATA_API = `keydown${EVENT_KEY}${DATA_API_KEY}`\nconst EVENT_KEYUP_DATA_API = `keyup${EVENT_KEY}${DATA_API_KEY}`\n\nconst CLASS_NAME_SHOW = 'show'\nconst CLASS_NAME_DROPUP = 'dropup'\nconst CLASS_NAME_DROPEND = 'dropend'\nconst CLASS_NAME_DROPSTART = 'dropstart'\nconst CLASS_NAME_DROPUP_CENTER = 'dropup-center'\nconst CLASS_NAME_DROPDOWN_CENTER = 'dropdown-center'\n\nconst SELECTOR_DATA_TOGGLE = '[data-bs-toggle=\"dropdown\"]:not(.disabled):not(:disabled)'\nconst SELECTOR_DATA_TOGGLE_SHOWN = `${SELECTOR_DATA_TOGGLE}.${CLASS_NAME_SHOW}`\nconst SELECTOR_MENU = '.dropdown-menu'\nconst SELECTOR_NAVBAR = '.navbar'\nconst SELECTOR_NAVBAR_NAV = '.navbar-nav'\nconst SELECTOR_VISIBLE_ITEMS = '.dropdown-menu .dropdown-item:not(.disabled):not(:disabled)'\n\nconst PLACEMENT_TOP = isRTL() ? 'top-end' : 'top-start'\nconst PLACEMENT_TOPEND = isRTL() ? 'top-start' : 'top-end'\nconst PLACEMENT_BOTTOM = isRTL() ? 'bottom-end' : 'bottom-start'\nconst PLACEMENT_BOTTOMEND = isRTL() ? 'bottom-start' : 'bottom-end'\nconst PLACEMENT_RIGHT = isRTL() ? 'left-start' : 'right-start'\nconst PLACEMENT_LEFT = isRTL() ? 'right-start' : 'left-start'\nconst PLACEMENT_TOPCENTER = 'top'\nconst PLACEMENT_BOTTOMCENTER = 'bottom'\n\nconst Default = {\n autoClose: true,\n boundary: 'clippingParents',\n display: 'dynamic',\n offset: [0, 2],\n popperConfig: null,\n reference: 'toggle'\n}\n\nconst DefaultType = {\n autoClose: '(boolean|string)',\n boundary: '(string|element)',\n display: 'string',\n offset: '(array|string|function)',\n popperConfig: '(null|object|function)',\n reference: '(string|element|object)'\n}\n\n/**\n * Class definition\n */\n\nclass Dropdown extends BaseComponent {\n constructor(element, config) {\n super(element, config)\n\n this._popper = null\n this._parent = this._element.parentNode // dropdown wrapper\n // TODO: v6 revert #37011 & change markup https://getbootstrap.com/docs/5.3/forms/input-group/\n this._menu = SelectorEngine.next(this._element, SELECTOR_MENU)[0] ||\n SelectorEngine.prev(this._element, SELECTOR_MENU)[0] ||\n SelectorEngine.findOne(SELECTOR_MENU, this._parent)\n this._inNavbar = this._detectNavbar()\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n toggle() {\n return this._isShown() ? this.hide() : this.show()\n }\n\n show() {\n if (isDisabled(this._element) || this._isShown()) {\n return\n }\n\n const relatedTarget = {\n relatedTarget: this._element\n }\n\n const showEvent = EventHandler.trigger(this._element, EVENT_SHOW, relatedTarget)\n\n if (showEvent.defaultPrevented) {\n return\n }\n\n this._createPopper()\n\n // If this is a touch-enabled device we add extra\n // empty mouseover listeners to the body's immediate children;\n // only needed because of broken event delegation on iOS\n // https://www.quirksmode.org/blog/archives/2014/02/mouse_event_bub.html\n if ('ontouchstart' in document.documentElement && !this._parent.closest(SELECTOR_NAVBAR_NAV)) {\n for (const element of [].concat(...document.body.children)) {\n EventHandler.on(element, 'mouseover', noop)\n }\n }\n\n this._element.focus()\n this._element.setAttribute('aria-expanded', true)\n\n this._menu.classList.add(CLASS_NAME_SHOW)\n this._element.classList.add(CLASS_NAME_SHOW)\n EventHandler.trigger(this._element, EVENT_SHOWN, relatedTarget)\n }\n\n hide() {\n if (isDisabled(this._element) || !this._isShown()) {\n return\n }\n\n const relatedTarget = {\n relatedTarget: this._element\n }\n\n this._completeHide(relatedTarget)\n }\n\n dispose() {\n if (this._popper) {\n this._popper.destroy()\n }\n\n super.dispose()\n }\n\n update() {\n this._inNavbar = this._detectNavbar()\n if (this._popper) {\n this._popper.update()\n }\n }\n\n // Private\n _completeHide(relatedTarget) {\n const hideEvent = EventHandler.trigger(this._element, EVENT_HIDE, relatedTarget)\n if (hideEvent.defaultPrevented) {\n return\n }\n\n // If this is a touch-enabled device we remove the extra\n // empty mouseover listeners we added for iOS support\n if ('ontouchstart' in document.documentElement) {\n for (const element of [].concat(...document.body.children)) {\n EventHandler.off(element, 'mouseover', noop)\n }\n }\n\n if (this._popper) {\n this._popper.destroy()\n }\n\n this._menu.classList.remove(CLASS_NAME_SHOW)\n this._element.classList.remove(CLASS_NAME_SHOW)\n this._element.setAttribute('aria-expanded', 'false')\n Manipulator.removeDataAttribute(this._menu, 'popper')\n EventHandler.trigger(this._element, EVENT_HIDDEN, relatedTarget)\n }\n\n _getConfig(config) {\n config = super._getConfig(config)\n\n if (typeof config.reference === 'object' && !isElement(config.reference) &&\n typeof config.reference.getBoundingClientRect !== 'function'\n ) {\n // Popper virtual elements require a getBoundingClientRect method\n throw new TypeError(`${NAME.toUpperCase()}: Option \"reference\" provided type \"object\" without a required \"getBoundingClientRect\" method.`)\n }\n\n return config\n }\n\n _createPopper() {\n if (typeof Popper === 'undefined') {\n throw new TypeError('Bootstrap\\'s dropdowns require Popper (https://popper.js.org)')\n }\n\n let referenceElement = this._element\n\n if (this._config.reference === 'parent') {\n referenceElement = this._parent\n } else if (isElement(this._config.reference)) {\n referenceElement = getElement(this._config.reference)\n } else if (typeof this._config.reference === 'object') {\n referenceElement = this._config.reference\n }\n\n const popperConfig = this._getPopperConfig()\n this._popper = Popper.createPopper(referenceElement, this._menu, popperConfig)\n }\n\n _isShown() {\n return this._menu.classList.contains(CLASS_NAME_SHOW)\n }\n\n _getPlacement() {\n const parentDropdown = this._parent\n\n if (parentDropdown.classList.contains(CLASS_NAME_DROPEND)) {\n return PLACEMENT_RIGHT\n }\n\n if (parentDropdown.classList.contains(CLASS_NAME_DROPSTART)) {\n return PLACEMENT_LEFT\n }\n\n if (parentDropdown.classList.contains(CLASS_NAME_DROPUP_CENTER)) {\n return PLACEMENT_TOPCENTER\n }\n\n if (parentDropdown.classList.contains(CLASS_NAME_DROPDOWN_CENTER)) {\n return PLACEMENT_BOTTOMCENTER\n }\n\n // We need to trim the value because custom properties can also include spaces\n const isEnd = getComputedStyle(this._menu).getPropertyValue('--bs-position').trim() === 'end'\n\n if (parentDropdown.classList.contains(CLASS_NAME_DROPUP)) {\n return isEnd ? PLACEMENT_TOPEND : PLACEMENT_TOP\n }\n\n return isEnd ? PLACEMENT_BOTTOMEND : PLACEMENT_BOTTOM\n }\n\n _detectNavbar() {\n return this._element.closest(SELECTOR_NAVBAR) !== null\n }\n\n _getOffset() {\n const { offset } = this._config\n\n if (typeof offset === 'string') {\n return offset.split(',').map(value => Number.parseInt(value, 10))\n }\n\n if (typeof offset === 'function') {\n return popperData => offset(popperData, this._element)\n }\n\n return offset\n }\n\n _getPopperConfig() {\n const defaultBsPopperConfig = {\n placement: this._getPlacement(),\n modifiers: [{\n name: 'preventOverflow',\n options: {\n boundary: this._config.boundary\n }\n },\n {\n name: 'offset',\n options: {\n offset: this._getOffset()\n }\n }]\n }\n\n // Disable Popper if we have a static display or Dropdown is in Navbar\n if (this._inNavbar || this._config.display === 'static') {\n Manipulator.setDataAttribute(this._menu, 'popper', 'static') // TODO: v6 remove\n defaultBsPopperConfig.modifiers = [{\n name: 'applyStyles',\n enabled: false\n }]\n }\n\n return {\n ...defaultBsPopperConfig,\n ...execute(this._config.popperConfig, [defaultBsPopperConfig])\n }\n }\n\n _selectMenuItem({ key, target }) {\n const items = SelectorEngine.find(SELECTOR_VISIBLE_ITEMS, this._menu).filter(element => isVisible(element))\n\n if (!items.length) {\n return\n }\n\n // if target isn't included in items (e.g. when expanding the dropdown)\n // allow cycling to get the last item in case key equals ARROW_UP_KEY\n getNextActiveElement(items, target, key === ARROW_DOWN_KEY, !items.includes(target)).focus()\n }\n\n // Static\n static jQueryInterface(config) {\n return this.each(function () {\n const data = Dropdown.getOrCreateInstance(this, config)\n\n if (typeof config !== 'string') {\n return\n }\n\n if (typeof data[config] === 'undefined') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config]()\n })\n }\n\n static clearMenus(event) {\n if (event.button === RIGHT_MOUSE_BUTTON || (event.type === 'keyup' && event.key !== TAB_KEY)) {\n return\n }\n\n const openToggles = SelectorEngine.find(SELECTOR_DATA_TOGGLE_SHOWN)\n\n for (const toggle of openToggles) {\n const context = Dropdown.getInstance(toggle)\n if (!context || context._config.autoClose === false) {\n continue\n }\n\n const composedPath = event.composedPath()\n const isMenuTarget = composedPath.includes(context._menu)\n if (\n composedPath.includes(context._element) ||\n (context._config.autoClose === 'inside' && !isMenuTarget) ||\n (context._config.autoClose === 'outside' && isMenuTarget)\n ) {\n continue\n }\n\n // Tab navigation through the dropdown menu or events from contained inputs shouldn't close the menu\n if (context._menu.contains(event.target) && ((event.type === 'keyup' && event.key === TAB_KEY) || /input|select|option|textarea|form/i.test(event.target.tagName))) {\n continue\n }\n\n const relatedTarget = { relatedTarget: context._element }\n\n if (event.type === 'click') {\n relatedTarget.clickEvent = event\n }\n\n context._completeHide(relatedTarget)\n }\n }\n\n static dataApiKeydownHandler(event) {\n // If not an UP | DOWN | ESCAPE key => not a dropdown command\n // If input/textarea && if key is other than ESCAPE => not a dropdown command\n\n const isInput = /input|textarea/i.test(event.target.tagName)\n const isEscapeEvent = event.key === ESCAPE_KEY\n const isUpOrDownEvent = [ARROW_UP_KEY, ARROW_DOWN_KEY].includes(event.key)\n\n if (!isUpOrDownEvent && !isEscapeEvent) {\n return\n }\n\n if (isInput && !isEscapeEvent) {\n return\n }\n\n event.preventDefault()\n\n // TODO: v6 revert #37011 & change markup https://getbootstrap.com/docs/5.3/forms/input-group/\n const getToggleButton = this.matches(SELECTOR_DATA_TOGGLE) ?\n this :\n (SelectorEngine.prev(this, SELECTOR_DATA_TOGGLE)[0] ||\n SelectorEngine.next(this, SELECTOR_DATA_TOGGLE)[0] ||\n SelectorEngine.findOne(SELECTOR_DATA_TOGGLE, event.delegateTarget.parentNode))\n\n const instance = Dropdown.getOrCreateInstance(getToggleButton)\n\n if (isUpOrDownEvent) {\n event.stopPropagation()\n instance.show()\n instance._selectMenuItem(event)\n return\n }\n\n if (instance._isShown()) { // else is escape and we check if it is shown\n event.stopPropagation()\n instance.hide()\n getToggleButton.focus()\n }\n }\n}\n\n/**\n * Data API implementation\n */\n\nEventHandler.on(document, EVENT_KEYDOWN_DATA_API, SELECTOR_DATA_TOGGLE, Dropdown.dataApiKeydownHandler)\nEventHandler.on(document, EVENT_KEYDOWN_DATA_API, SELECTOR_MENU, Dropdown.dataApiKeydownHandler)\nEventHandler.on(document, EVENT_CLICK_DATA_API, Dropdown.clearMenus)\nEventHandler.on(document, EVENT_KEYUP_DATA_API, Dropdown.clearMenus)\nEventHandler.on(document, EVENT_CLICK_DATA_API, SELECTOR_DATA_TOGGLE, function (event) {\n event.preventDefault()\n Dropdown.getOrCreateInstance(this).toggle()\n})\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Dropdown)\n\nexport default Dropdown\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/backdrop.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport EventHandler from '../dom/event-handler.js'\nimport Config from './config.js'\nimport { execute, executeAfterTransition, getElement, reflow } from './index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'backdrop'\nconst CLASS_NAME_FADE = 'fade'\nconst CLASS_NAME_SHOW = 'show'\nconst EVENT_MOUSEDOWN = `mousedown.bs.${NAME}`\n\nconst Default = {\n className: 'modal-backdrop',\n clickCallback: null,\n isAnimated: false,\n isVisible: true, // if false, we use the backdrop helper without adding any element to the dom\n rootElement: 'body' // give the choice to place backdrop under different elements\n}\n\nconst DefaultType = {\n className: 'string',\n clickCallback: '(function|null)',\n isAnimated: 'boolean',\n isVisible: 'boolean',\n rootElement: '(element|string)'\n}\n\n/**\n * Class definition\n */\n\nclass Backdrop extends Config {\n constructor(config) {\n super()\n this._config = this._getConfig(config)\n this._isAppended = false\n this._element = null\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n show(callback) {\n if (!this._config.isVisible) {\n execute(callback)\n return\n }\n\n this._append()\n\n const element = this._getElement()\n if (this._config.isAnimated) {\n reflow(element)\n }\n\n element.classList.add(CLASS_NAME_SHOW)\n\n this._emulateAnimation(() => {\n execute(callback)\n })\n }\n\n hide(callback) {\n if (!this._config.isVisible) {\n execute(callback)\n return\n }\n\n this._getElement().classList.remove(CLASS_NAME_SHOW)\n\n this._emulateAnimation(() => {\n this.dispose()\n execute(callback)\n })\n }\n\n dispose() {\n if (!this._isAppended) {\n return\n }\n\n EventHandler.off(this._element, EVENT_MOUSEDOWN)\n\n this._element.remove()\n this._isAppended = false\n }\n\n // Private\n _getElement() {\n if (!this._element) {\n const backdrop = document.createElement('div')\n backdrop.className = this._config.className\n if (this._config.isAnimated) {\n backdrop.classList.add(CLASS_NAME_FADE)\n }\n\n this._element = backdrop\n }\n\n return this._element\n }\n\n _configAfterMerge(config) {\n // use getElement() with the default \"body\" to get a fresh Element on each instantiation\n config.rootElement = getElement(config.rootElement)\n return config\n }\n\n _append() {\n if (this._isAppended) {\n return\n }\n\n const element = this._getElement()\n this._config.rootElement.append(element)\n\n EventHandler.on(element, EVENT_MOUSEDOWN, () => {\n execute(this._config.clickCallback)\n })\n\n this._isAppended = true\n }\n\n _emulateAnimation(callback) {\n executeAfterTransition(callback, this._getElement(), this._config.isAnimated)\n }\n}\n\nexport default Backdrop\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/focustrap.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport EventHandler from '../dom/event-handler.js'\nimport SelectorEngine from '../dom/selector-engine.js'\nimport Config from './config.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'focustrap'\nconst DATA_KEY = 'bs.focustrap'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst EVENT_FOCUSIN = `focusin${EVENT_KEY}`\nconst EVENT_KEYDOWN_TAB = `keydown.tab${EVENT_KEY}`\n\nconst TAB_KEY = 'Tab'\nconst TAB_NAV_FORWARD = 'forward'\nconst TAB_NAV_BACKWARD = 'backward'\n\nconst Default = {\n autofocus: true,\n trapElement: null // The element to trap focus inside of\n}\n\nconst DefaultType = {\n autofocus: 'boolean',\n trapElement: 'element'\n}\n\n/**\n * Class definition\n */\n\nclass FocusTrap extends Config {\n constructor(config) {\n super()\n this._config = this._getConfig(config)\n this._isActive = false\n this._lastTabNavDirection = null\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n activate() {\n if (this._isActive) {\n return\n }\n\n if (this._config.autofocus) {\n this._config.trapElement.focus()\n }\n\n EventHandler.off(document, EVENT_KEY) // guard against infinite focus loop\n EventHandler.on(document, EVENT_FOCUSIN, event => this._handleFocusin(event))\n EventHandler.on(document, EVENT_KEYDOWN_TAB, event => this._handleKeydown(event))\n\n this._isActive = true\n }\n\n deactivate() {\n if (!this._isActive) {\n return\n }\n\n this._isActive = false\n EventHandler.off(document, EVENT_KEY)\n }\n\n // Private\n _handleFocusin(event) {\n const { trapElement } = this._config\n\n if (event.target === document || event.target === trapElement || trapElement.contains(event.target)) {\n return\n }\n\n const elements = SelectorEngine.focusableChildren(trapElement)\n\n if (elements.length === 0) {\n trapElement.focus()\n } else if (this._lastTabNavDirection === TAB_NAV_BACKWARD) {\n elements[elements.length - 1].focus()\n } else {\n elements[0].focus()\n }\n }\n\n _handleKeydown(event) {\n if (event.key !== TAB_KEY) {\n return\n }\n\n this._lastTabNavDirection = event.shiftKey ? TAB_NAV_BACKWARD : TAB_NAV_FORWARD\n }\n}\n\nexport default FocusTrap\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/scrollBar.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport Manipulator from '../dom/manipulator.js'\nimport SelectorEngine from '../dom/selector-engine.js'\nimport { isElement } from './index.js'\n\n/**\n * Constants\n */\n\nconst SELECTOR_FIXED_CONTENT = '.fixed-top, .fixed-bottom, .is-fixed, .sticky-top'\nconst SELECTOR_STICKY_CONTENT = '.sticky-top'\nconst PROPERTY_PADDING = 'padding-right'\nconst PROPERTY_MARGIN = 'margin-right'\n\n/**\n * Class definition\n */\n\nclass ScrollBarHelper {\n constructor() {\n this._element = document.body\n }\n\n // Public\n getWidth() {\n // https://developer.mozilla.org/en-US/docs/Web/API/Window/innerWidth#usage_notes\n const documentWidth = document.documentElement.clientWidth\n return Math.abs(window.innerWidth - documentWidth)\n }\n\n hide() {\n const width = this.getWidth()\n this._disableOverFlow()\n // give padding to element to balance the hidden scrollbar width\n this._setElementAttributes(this._element, PROPERTY_PADDING, calculatedValue => calculatedValue + width)\n // trick: We adjust positive paddingRight and negative marginRight to sticky-top elements to keep showing fullwidth\n this._setElementAttributes(SELECTOR_FIXED_CONTENT, PROPERTY_PADDING, calculatedValue => calculatedValue + width)\n this._setElementAttributes(SELECTOR_STICKY_CONTENT, PROPERTY_MARGIN, calculatedValue => calculatedValue - width)\n }\n\n reset() {\n this._resetElementAttributes(this._element, 'overflow')\n this._resetElementAttributes(this._element, PROPERTY_PADDING)\n this._resetElementAttributes(SELECTOR_FIXED_CONTENT, PROPERTY_PADDING)\n this._resetElementAttributes(SELECTOR_STICKY_CONTENT, PROPERTY_MARGIN)\n }\n\n isOverflowing() {\n return this.getWidth() > 0\n }\n\n // Private\n _disableOverFlow() {\n this._saveInitialAttribute(this._element, 'overflow')\n this._element.style.overflow = 'hidden'\n }\n\n _setElementAttributes(selector, styleProperty, callback) {\n const scrollbarWidth = this.getWidth()\n const manipulationCallBack = element => {\n if (element !== this._element && window.innerWidth > element.clientWidth + scrollbarWidth) {\n return\n }\n\n this._saveInitialAttribute(element, styleProperty)\n const calculatedValue = window.getComputedStyle(element).getPropertyValue(styleProperty)\n element.style.setProperty(styleProperty, `${callback(Number.parseFloat(calculatedValue))}px`)\n }\n\n this._applyManipulationCallback(selector, manipulationCallBack)\n }\n\n _saveInitialAttribute(element, styleProperty) {\n const actualValue = element.style.getPropertyValue(styleProperty)\n if (actualValue) {\n Manipulator.setDataAttribute(element, styleProperty, actualValue)\n }\n }\n\n _resetElementAttributes(selector, styleProperty) {\n const manipulationCallBack = element => {\n const value = Manipulator.getDataAttribute(element, styleProperty)\n // We only want to remove the property if the value is `null`; the value can also be zero\n if (value === null) {\n element.style.removeProperty(styleProperty)\n return\n }\n\n Manipulator.removeDataAttribute(element, styleProperty)\n element.style.setProperty(styleProperty, value)\n }\n\n this._applyManipulationCallback(selector, manipulationCallBack)\n }\n\n _applyManipulationCallback(selector, callBack) {\n if (isElement(selector)) {\n callBack(selector)\n return\n }\n\n for (const sel of SelectorEngine.find(selector, this._element)) {\n callBack(sel)\n }\n }\n}\n\nexport default ScrollBarHelper\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap modal.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport SelectorEngine from './dom/selector-engine.js'\nimport Backdrop from './util/backdrop.js'\nimport { enableDismissTrigger } from './util/component-functions.js'\nimport FocusTrap from './util/focustrap.js'\nimport { defineJQueryPlugin, isRTL, isVisible, reflow } from './util/index.js'\nimport ScrollBarHelper from './util/scrollbar.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'modal'\nconst DATA_KEY = 'bs.modal'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst DATA_API_KEY = '.data-api'\nconst ESCAPE_KEY = 'Escape'\n\nconst EVENT_HIDE = `hide${EVENT_KEY}`\nconst EVENT_HIDE_PREVENTED = `hidePrevented${EVENT_KEY}`\nconst EVENT_HIDDEN = `hidden${EVENT_KEY}`\nconst EVENT_SHOW = `show${EVENT_KEY}`\nconst EVENT_SHOWN = `shown${EVENT_KEY}`\nconst EVENT_RESIZE = `resize${EVENT_KEY}`\nconst EVENT_CLICK_DISMISS = `click.dismiss${EVENT_KEY}`\nconst EVENT_MOUSEDOWN_DISMISS = `mousedown.dismiss${EVENT_KEY}`\nconst EVENT_KEYDOWN_DISMISS = `keydown.dismiss${EVENT_KEY}`\nconst EVENT_CLICK_DATA_API = `click${EVENT_KEY}${DATA_API_KEY}`\n\nconst CLASS_NAME_OPEN = 'modal-open'\nconst CLASS_NAME_FADE = 'fade'\nconst CLASS_NAME_SHOW = 'show'\nconst CLASS_NAME_STATIC = 'modal-static'\n\nconst OPEN_SELECTOR = '.modal.show'\nconst SELECTOR_DIALOG = '.modal-dialog'\nconst SELECTOR_MODAL_BODY = '.modal-body'\nconst SELECTOR_DATA_TOGGLE = '[data-bs-toggle=\"modal\"]'\n\nconst Default = {\n backdrop: true,\n focus: true,\n keyboard: true\n}\n\nconst DefaultType = {\n backdrop: '(boolean|string)',\n focus: 'boolean',\n keyboard: 'boolean'\n}\n\n/**\n * Class definition\n */\n\nclass Modal extends BaseComponent {\n constructor(element, config) {\n super(element, config)\n\n this._dialog = SelectorEngine.findOne(SELECTOR_DIALOG, this._element)\n this._backdrop = this._initializeBackDrop()\n this._focustrap = this._initializeFocusTrap()\n this._isShown = false\n this._isTransitioning = false\n this._scrollBar = new ScrollBarHelper()\n\n this._addEventListeners()\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n toggle(relatedTarget) {\n return this._isShown ? this.hide() : this.show(relatedTarget)\n }\n\n show(relatedTarget) {\n if (this._isShown || this._isTransitioning) {\n return\n }\n\n const showEvent = EventHandler.trigger(this._element, EVENT_SHOW, {\n relatedTarget\n })\n\n if (showEvent.defaultPrevented) {\n return\n }\n\n this._isShown = true\n this._isTransitioning = true\n\n this._scrollBar.hide()\n\n document.body.classList.add(CLASS_NAME_OPEN)\n\n this._adjustDialog()\n\n this._backdrop.show(() => this._showElement(relatedTarget))\n }\n\n hide() {\n if (!this._isShown || this._isTransitioning) {\n return\n }\n\n const hideEvent = EventHandler.trigger(this._element, EVENT_HIDE)\n\n if (hideEvent.defaultPrevented) {\n return\n }\n\n this._isShown = false\n this._isTransitioning = true\n this._focustrap.deactivate()\n\n this._element.classList.remove(CLASS_NAME_SHOW)\n\n this._queueCallback(() => this._hideModal(), this._element, this._isAnimated())\n }\n\n dispose() {\n EventHandler.off(window, EVENT_KEY)\n EventHandler.off(this._dialog, EVENT_KEY)\n\n this._backdrop.dispose()\n this._focustrap.deactivate()\n\n super.dispose()\n }\n\n handleUpdate() {\n this._adjustDialog()\n }\n\n // Private\n _initializeBackDrop() {\n return new Backdrop({\n isVisible: Boolean(this._config.backdrop), // 'static' option will be translated to true, and booleans will keep their value,\n isAnimated: this._isAnimated()\n })\n }\n\n _initializeFocusTrap() {\n return new FocusTrap({\n trapElement: this._element\n })\n }\n\n _showElement(relatedTarget) {\n // try to append dynamic modal\n if (!document.body.contains(this._element)) {\n document.body.append(this._element)\n }\n\n this._element.style.display = 'block'\n this._element.removeAttribute('aria-hidden')\n this._element.setAttribute('aria-modal', true)\n this._element.setAttribute('role', 'dialog')\n this._element.scrollTop = 0\n\n const modalBody = SelectorEngine.findOne(SELECTOR_MODAL_BODY, this._dialog)\n if (modalBody) {\n modalBody.scrollTop = 0\n }\n\n reflow(this._element)\n\n this._element.classList.add(CLASS_NAME_SHOW)\n\n const transitionComplete = () => {\n if (this._config.focus) {\n this._focustrap.activate()\n }\n\n this._isTransitioning = false\n EventHandler.trigger(this._element, EVENT_SHOWN, {\n relatedTarget\n })\n }\n\n this._queueCallback(transitionComplete, this._dialog, this._isAnimated())\n }\n\n _addEventListeners() {\n EventHandler.on(this._element, EVENT_KEYDOWN_DISMISS, event => {\n if (event.key !== ESCAPE_KEY) {\n return\n }\n\n if (this._config.keyboard) {\n this.hide()\n return\n }\n\n this._triggerBackdropTransition()\n })\n\n EventHandler.on(window, EVENT_RESIZE, () => {\n if (this._isShown && !this._isTransitioning) {\n this._adjustDialog()\n }\n })\n\n EventHandler.on(this._element, EVENT_MOUSEDOWN_DISMISS, event => {\n // a bad trick to segregate clicks that may start inside dialog but end outside, and avoid listen to scrollbar clicks\n EventHandler.one(this._element, EVENT_CLICK_DISMISS, event2 => {\n if (this._element !== event.target || this._element !== event2.target) {\n return\n }\n\n if (this._config.backdrop === 'static') {\n this._triggerBackdropTransition()\n return\n }\n\n if (this._config.backdrop) {\n this.hide()\n }\n })\n })\n }\n\n _hideModal() {\n this._element.style.display = 'none'\n this._element.setAttribute('aria-hidden', true)\n this._element.removeAttribute('aria-modal')\n this._element.removeAttribute('role')\n this._isTransitioning = false\n\n this._backdrop.hide(() => {\n document.body.classList.remove(CLASS_NAME_OPEN)\n this._resetAdjustments()\n this._scrollBar.reset()\n EventHandler.trigger(this._element, EVENT_HIDDEN)\n })\n }\n\n _isAnimated() {\n return this._element.classList.contains(CLASS_NAME_FADE)\n }\n\n _triggerBackdropTransition() {\n const hideEvent = EventHandler.trigger(this._element, EVENT_HIDE_PREVENTED)\n if (hideEvent.defaultPrevented) {\n return\n }\n\n const isModalOverflowing = this._element.scrollHeight > document.documentElement.clientHeight\n const initialOverflowY = this._element.style.overflowY\n // return if the following background transition hasn't yet completed\n if (initialOverflowY === 'hidden' || this._element.classList.contains(CLASS_NAME_STATIC)) {\n return\n }\n\n if (!isModalOverflowing) {\n this._element.style.overflowY = 'hidden'\n }\n\n this._element.classList.add(CLASS_NAME_STATIC)\n this._queueCallback(() => {\n this._element.classList.remove(CLASS_NAME_STATIC)\n this._queueCallback(() => {\n this._element.style.overflowY = initialOverflowY\n }, this._dialog)\n }, this._dialog)\n\n this._element.focus()\n }\n\n /**\n * The following methods are used to handle overflowing modals\n */\n\n _adjustDialog() {\n const isModalOverflowing = this._element.scrollHeight > document.documentElement.clientHeight\n const scrollbarWidth = this._scrollBar.getWidth()\n const isBodyOverflowing = scrollbarWidth > 0\n\n if (isBodyOverflowing && !isModalOverflowing) {\n const property = isRTL() ? 'paddingLeft' : 'paddingRight'\n this._element.style[property] = `${scrollbarWidth}px`\n }\n\n if (!isBodyOverflowing && isModalOverflowing) {\n const property = isRTL() ? 'paddingRight' : 'paddingLeft'\n this._element.style[property] = `${scrollbarWidth}px`\n }\n }\n\n _resetAdjustments() {\n this._element.style.paddingLeft = ''\n this._element.style.paddingRight = ''\n }\n\n // Static\n static jQueryInterface(config, relatedTarget) {\n return this.each(function () {\n const data = Modal.getOrCreateInstance(this, config)\n\n if (typeof config !== 'string') {\n return\n }\n\n if (typeof data[config] === 'undefined') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config](relatedTarget)\n })\n }\n}\n\n/**\n * Data API implementation\n */\n\nEventHandler.on(document, EVENT_CLICK_DATA_API, SELECTOR_DATA_TOGGLE, function (event) {\n const target = SelectorEngine.getElementFromSelector(this)\n\n if (['A', 'AREA'].includes(this.tagName)) {\n event.preventDefault()\n }\n\n EventHandler.one(target, EVENT_SHOW, showEvent => {\n if (showEvent.defaultPrevented) {\n // only register focus restorer if modal will actually get shown\n return\n }\n\n EventHandler.one(target, EVENT_HIDDEN, () => {\n if (isVisible(this)) {\n this.focus()\n }\n })\n })\n\n // avoid conflict when clicking modal toggler while another one is open\n const alreadyOpen = SelectorEngine.findOne(OPEN_SELECTOR)\n if (alreadyOpen) {\n Modal.getInstance(alreadyOpen).hide()\n }\n\n const data = Modal.getOrCreateInstance(target)\n\n data.toggle(this)\n})\n\nenableDismissTrigger(Modal)\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Modal)\n\nexport default Modal\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap offcanvas.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport SelectorEngine from './dom/selector-engine.js'\nimport Backdrop from './util/backdrop.js'\nimport { enableDismissTrigger } from './util/component-functions.js'\nimport FocusTrap from './util/focustrap.js'\nimport {\n defineJQueryPlugin,\n isDisabled,\n isVisible\n} from './util/index.js'\nimport ScrollBarHelper from './util/scrollbar.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'offcanvas'\nconst DATA_KEY = 'bs.offcanvas'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst DATA_API_KEY = '.data-api'\nconst EVENT_LOAD_DATA_API = `load${EVENT_KEY}${DATA_API_KEY}`\nconst ESCAPE_KEY = 'Escape'\n\nconst CLASS_NAME_SHOW = 'show'\nconst CLASS_NAME_SHOWING = 'showing'\nconst CLASS_NAME_HIDING = 'hiding'\nconst CLASS_NAME_BACKDROP = 'offcanvas-backdrop'\nconst OPEN_SELECTOR = '.offcanvas.show'\n\nconst EVENT_SHOW = `show${EVENT_KEY}`\nconst EVENT_SHOWN = `shown${EVENT_KEY}`\nconst EVENT_HIDE = `hide${EVENT_KEY}`\nconst EVENT_HIDE_PREVENTED = `hidePrevented${EVENT_KEY}`\nconst EVENT_HIDDEN = `hidden${EVENT_KEY}`\nconst EVENT_RESIZE = `resize${EVENT_KEY}`\nconst EVENT_CLICK_DATA_API = `click${EVENT_KEY}${DATA_API_KEY}`\nconst EVENT_KEYDOWN_DISMISS = `keydown.dismiss${EVENT_KEY}`\n\nconst SELECTOR_DATA_TOGGLE = '[data-bs-toggle=\"offcanvas\"]'\n\nconst Default = {\n backdrop: true,\n keyboard: true,\n scroll: false\n}\n\nconst DefaultType = {\n backdrop: '(boolean|string)',\n keyboard: 'boolean',\n scroll: 'boolean'\n}\n\n/**\n * Class definition\n */\n\nclass Offcanvas extends BaseComponent {\n constructor(element, config) {\n super(element, config)\n\n this._isShown = false\n this._backdrop = this._initializeBackDrop()\n this._focustrap = this._initializeFocusTrap()\n this._addEventListeners()\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n toggle(relatedTarget) {\n return this._isShown ? this.hide() : this.show(relatedTarget)\n }\n\n show(relatedTarget) {\n if (this._isShown) {\n return\n }\n\n const showEvent = EventHandler.trigger(this._element, EVENT_SHOW, { relatedTarget })\n\n if (showEvent.defaultPrevented) {\n return\n }\n\n this._isShown = true\n this._backdrop.show()\n\n if (!this._config.scroll) {\n new ScrollBarHelper().hide()\n }\n\n this._element.setAttribute('aria-modal', true)\n this._element.setAttribute('role', 'dialog')\n this._element.classList.add(CLASS_NAME_SHOWING)\n\n const completeCallBack = () => {\n if (!this._config.scroll || this._config.backdrop) {\n this._focustrap.activate()\n }\n\n this._element.classList.add(CLASS_NAME_SHOW)\n this._element.classList.remove(CLASS_NAME_SHOWING)\n EventHandler.trigger(this._element, EVENT_SHOWN, { relatedTarget })\n }\n\n this._queueCallback(completeCallBack, this._element, true)\n }\n\n hide() {\n if (!this._isShown) {\n return\n }\n\n const hideEvent = EventHandler.trigger(this._element, EVENT_HIDE)\n\n if (hideEvent.defaultPrevented) {\n return\n }\n\n this._focustrap.deactivate()\n this._element.blur()\n this._isShown = false\n this._element.classList.add(CLASS_NAME_HIDING)\n this._backdrop.hide()\n\n const completeCallback = () => {\n this._element.classList.remove(CLASS_NAME_SHOW, CLASS_NAME_HIDING)\n this._element.removeAttribute('aria-modal')\n this._element.removeAttribute('role')\n\n if (!this._config.scroll) {\n new ScrollBarHelper().reset()\n }\n\n EventHandler.trigger(this._element, EVENT_HIDDEN)\n }\n\n this._queueCallback(completeCallback, this._element, true)\n }\n\n dispose() {\n this._backdrop.dispose()\n this._focustrap.deactivate()\n super.dispose()\n }\n\n // Private\n _initializeBackDrop() {\n const clickCallback = () => {\n if (this._config.backdrop === 'static') {\n EventHandler.trigger(this._element, EVENT_HIDE_PREVENTED)\n return\n }\n\n this.hide()\n }\n\n // 'static' option will be translated to true, and booleans will keep their value\n const isVisible = Boolean(this._config.backdrop)\n\n return new Backdrop({\n className: CLASS_NAME_BACKDROP,\n isVisible,\n isAnimated: true,\n rootElement: this._element.parentNode,\n clickCallback: isVisible ? clickCallback : null\n })\n }\n\n _initializeFocusTrap() {\n return new FocusTrap({\n trapElement: this._element\n })\n }\n\n _addEventListeners() {\n EventHandler.on(this._element, EVENT_KEYDOWN_DISMISS, event => {\n if (event.key !== ESCAPE_KEY) {\n return\n }\n\n if (this._config.keyboard) {\n this.hide()\n return\n }\n\n EventHandler.trigger(this._element, EVENT_HIDE_PREVENTED)\n })\n }\n\n // Static\n static jQueryInterface(config) {\n return this.each(function () {\n const data = Offcanvas.getOrCreateInstance(this, config)\n\n if (typeof config !== 'string') {\n return\n }\n\n if (data[config] === undefined || config.startsWith('_') || config === 'constructor') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config](this)\n })\n }\n}\n\n/**\n * Data API implementation\n */\n\nEventHandler.on(document, EVENT_CLICK_DATA_API, SELECTOR_DATA_TOGGLE, function (event) {\n const target = SelectorEngine.getElementFromSelector(this)\n\n if (['A', 'AREA'].includes(this.tagName)) {\n event.preventDefault()\n }\n\n if (isDisabled(this)) {\n return\n }\n\n EventHandler.one(target, EVENT_HIDDEN, () => {\n // focus on trigger when it is closed\n if (isVisible(this)) {\n this.focus()\n }\n })\n\n // avoid conflict when clicking a toggler of an offcanvas, while another is open\n const alreadyOpen = SelectorEngine.findOne(OPEN_SELECTOR)\n if (alreadyOpen && alreadyOpen !== target) {\n Offcanvas.getInstance(alreadyOpen).hide()\n }\n\n const data = Offcanvas.getOrCreateInstance(target)\n data.toggle(this)\n})\n\nEventHandler.on(window, EVENT_LOAD_DATA_API, () => {\n for (const selector of SelectorEngine.find(OPEN_SELECTOR)) {\n Offcanvas.getOrCreateInstance(selector).show()\n }\n})\n\nEventHandler.on(window, EVENT_RESIZE, () => {\n for (const element of SelectorEngine.find('[aria-modal][class*=show][class*=offcanvas-]')) {\n if (getComputedStyle(element).position !== 'fixed') {\n Offcanvas.getOrCreateInstance(element).hide()\n }\n }\n})\n\nenableDismissTrigger(Offcanvas)\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Offcanvas)\n\nexport default Offcanvas\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/sanitizer.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\n// js-docs-start allow-list\nconst ARIA_ATTRIBUTE_PATTERN = /^aria-[\\w-]*$/i\n\nexport const DefaultAllowlist = {\n // Global attributes allowed on any supplied element below.\n '*': ['class', 'dir', 'id', 'lang', 'role', ARIA_ATTRIBUTE_PATTERN],\n a: ['target', 'href', 'title', 'rel'],\n area: [],\n b: [],\n br: [],\n col: [],\n code: [],\n div: [],\n em: [],\n hr: [],\n h1: [],\n h2: [],\n h3: [],\n h4: [],\n h5: [],\n h6: [],\n i: [],\n img: ['src', 'srcset', 'alt', 'title', 'width', 'height'],\n li: [],\n ol: [],\n p: [],\n pre: [],\n s: [],\n small: [],\n span: [],\n sub: [],\n sup: [],\n strong: [],\n u: [],\n ul: []\n}\n// js-docs-end allow-list\n\nconst uriAttributes = new Set([\n 'background',\n 'cite',\n 'href',\n 'itemtype',\n 'longdesc',\n 'poster',\n 'src',\n 'xlink:href'\n])\n\n/**\n * A pattern that recognizes URLs that are safe wrt. XSS in URL navigation\n * contexts.\n *\n * Shout-out to Angular https://github.com/angular/angular/blob/15.2.8/packages/core/src/sanitization/url_sanitizer.ts#L38\n */\n// eslint-disable-next-line unicorn/better-regex\nconst SAFE_URL_PATTERN = /^(?!javascript:)(?:[a-z0-9+.-]+:|[^&:/?#]*(?:[/?#]|$))/i\n\nconst allowedAttribute = (attribute, allowedAttributeList) => {\n const attributeName = attribute.nodeName.toLowerCase()\n\n if (allowedAttributeList.includes(attributeName)) {\n if (uriAttributes.has(attributeName)) {\n return Boolean(SAFE_URL_PATTERN.test(attribute.nodeValue))\n }\n\n return true\n }\n\n // Check if a regular expression validates the attribute.\n return allowedAttributeList.filter(attributeRegex => attributeRegex instanceof RegExp)\n .some(regex => regex.test(attributeName))\n}\n\nexport function sanitizeHtml(unsafeHtml, allowList, sanitizeFunction) {\n if (!unsafeHtml.length) {\n return unsafeHtml\n }\n\n if (sanitizeFunction && typeof sanitizeFunction === 'function') {\n return sanitizeFunction(unsafeHtml)\n }\n\n const domParser = new window.DOMParser()\n const createdDocument = domParser.parseFromString(unsafeHtml, 'text/html')\n const elements = [].concat(...createdDocument.body.querySelectorAll('*'))\n\n for (const element of elements) {\n const elementName = element.nodeName.toLowerCase()\n\n if (!Object.keys(allowList).includes(elementName)) {\n element.remove()\n continue\n }\n\n const attributeList = [].concat(...element.attributes)\n const allowedAttributes = [].concat(allowList['*'] || [], allowList[elementName] || [])\n\n for (const attribute of attributeList) {\n if (!allowedAttribute(attribute, allowedAttributes)) {\n element.removeAttribute(attribute.nodeName)\n }\n }\n }\n\n return createdDocument.body.innerHTML\n}\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap util/template-factory.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport SelectorEngine from '../dom/selector-engine.js'\nimport Config from './config.js'\nimport { DefaultAllowlist, sanitizeHtml } from './sanitizer.js'\nimport { execute, getElement, isElement } from './index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'TemplateFactory'\n\nconst Default = {\n allowList: DefaultAllowlist,\n content: {}, // { selector : text , selector2 : text2 , }\n extraClass: '',\n html: false,\n sanitize: true,\n sanitizeFn: null,\n template: '
'\n}\n\nconst DefaultType = {\n allowList: 'object',\n content: 'object',\n extraClass: '(string|function)',\n html: 'boolean',\n sanitize: 'boolean',\n sanitizeFn: '(null|function)',\n template: 'string'\n}\n\nconst DefaultContentType = {\n entry: '(string|element|function|null)',\n selector: '(string|element)'\n}\n\n/**\n * Class definition\n */\n\nclass TemplateFactory extends Config {\n constructor(config) {\n super()\n this._config = this._getConfig(config)\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n getContent() {\n return Object.values(this._config.content)\n .map(config => this._resolvePossibleFunction(config))\n .filter(Boolean)\n }\n\n hasContent() {\n return this.getContent().length > 0\n }\n\n changeContent(content) {\n this._checkContent(content)\n this._config.content = { ...this._config.content, ...content }\n return this\n }\n\n toHtml() {\n const templateWrapper = document.createElement('div')\n templateWrapper.innerHTML = this._maybeSanitize(this._config.template)\n\n for (const [selector, text] of Object.entries(this._config.content)) {\n this._setContent(templateWrapper, text, selector)\n }\n\n const template = templateWrapper.children[0]\n const extraClass = this._resolvePossibleFunction(this._config.extraClass)\n\n if (extraClass) {\n template.classList.add(...extraClass.split(' '))\n }\n\n return template\n }\n\n // Private\n _typeCheckConfig(config) {\n super._typeCheckConfig(config)\n this._checkContent(config.content)\n }\n\n _checkContent(arg) {\n for (const [selector, content] of Object.entries(arg)) {\n super._typeCheckConfig({ selector, entry: content }, DefaultContentType)\n }\n }\n\n _setContent(template, content, selector) {\n const templateElement = SelectorEngine.findOne(selector, template)\n\n if (!templateElement) {\n return\n }\n\n content = this._resolvePossibleFunction(content)\n\n if (!content) {\n templateElement.remove()\n return\n }\n\n if (isElement(content)) {\n this._putElementInTemplate(getElement(content), templateElement)\n return\n }\n\n if (this._config.html) {\n templateElement.innerHTML = this._maybeSanitize(content)\n return\n }\n\n templateElement.textContent = content\n }\n\n _maybeSanitize(arg) {\n return this._config.sanitize ? sanitizeHtml(arg, this._config.allowList, this._config.sanitizeFn) : arg\n }\n\n _resolvePossibleFunction(arg) {\n return execute(arg, [this])\n }\n\n _putElementInTemplate(element, templateElement) {\n if (this._config.html) {\n templateElement.innerHTML = ''\n templateElement.append(element)\n return\n }\n\n templateElement.textContent = element.textContent\n }\n}\n\nexport default TemplateFactory\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap tooltip.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport * as Popper from '@popperjs/core'\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport Manipulator from './dom/manipulator.js'\nimport { defineJQueryPlugin, execute, findShadowRoot, getElement, getUID, isRTL, noop } from './util/index.js'\nimport { DefaultAllowlist } from './util/sanitizer.js'\nimport TemplateFactory from './util/template-factory.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'tooltip'\nconst DISALLOWED_ATTRIBUTES = new Set(['sanitize', 'allowList', 'sanitizeFn'])\n\nconst CLASS_NAME_FADE = 'fade'\nconst CLASS_NAME_MODAL = 'modal'\nconst CLASS_NAME_SHOW = 'show'\n\nconst SELECTOR_TOOLTIP_INNER = '.tooltip-inner'\nconst SELECTOR_MODAL = `.${CLASS_NAME_MODAL}`\n\nconst EVENT_MODAL_HIDE = 'hide.bs.modal'\n\nconst TRIGGER_HOVER = 'hover'\nconst TRIGGER_FOCUS = 'focus'\nconst TRIGGER_CLICK = 'click'\nconst TRIGGER_MANUAL = 'manual'\n\nconst EVENT_HIDE = 'hide'\nconst EVENT_HIDDEN = 'hidden'\nconst EVENT_SHOW = 'show'\nconst EVENT_SHOWN = 'shown'\nconst EVENT_INSERTED = 'inserted'\nconst EVENT_CLICK = 'click'\nconst EVENT_FOCUSIN = 'focusin'\nconst EVENT_FOCUSOUT = 'focusout'\nconst EVENT_MOUSEENTER = 'mouseenter'\nconst EVENT_MOUSELEAVE = 'mouseleave'\n\nconst AttachmentMap = {\n AUTO: 'auto',\n TOP: 'top',\n RIGHT: isRTL() ? 'left' : 'right',\n BOTTOM: 'bottom',\n LEFT: isRTL() ? 'right' : 'left'\n}\n\nconst Default = {\n allowList: DefaultAllowlist,\n animation: true,\n boundary: 'clippingParents',\n container: false,\n customClass: '',\n delay: 0,\n fallbackPlacements: ['top', 'right', 'bottom', 'left'],\n html: false,\n offset: [0, 6],\n placement: 'top',\n popperConfig: null,\n sanitize: true,\n sanitizeFn: null,\n selector: false,\n template: '
' +\n '
' +\n '
' +\n '
',\n title: '',\n trigger: 'hover focus'\n}\n\nconst DefaultType = {\n allowList: 'object',\n animation: 'boolean',\n boundary: '(string|element)',\n container: '(string|element|boolean)',\n customClass: '(string|function)',\n delay: '(number|object)',\n fallbackPlacements: 'array',\n html: 'boolean',\n offset: '(array|string|function)',\n placement: '(string|function)',\n popperConfig: '(null|object|function)',\n sanitize: 'boolean',\n sanitizeFn: '(null|function)',\n selector: '(string|boolean)',\n template: 'string',\n title: '(string|element|function)',\n trigger: 'string'\n}\n\n/**\n * Class definition\n */\n\nclass Tooltip extends BaseComponent {\n constructor(element, config) {\n if (typeof Popper === 'undefined') {\n throw new TypeError('Bootstrap\\'s tooltips require Popper (https://popper.js.org)')\n }\n\n super(element, config)\n\n // Private\n this._isEnabled = true\n this._timeout = 0\n this._isHovered = null\n this._activeTrigger = {}\n this._popper = null\n this._templateFactory = null\n this._newContent = null\n\n // Protected\n this.tip = null\n\n this._setListeners()\n\n if (!this._config.selector) {\n this._fixTitle()\n }\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n enable() {\n this._isEnabled = true\n }\n\n disable() {\n this._isEnabled = false\n }\n\n toggleEnabled() {\n this._isEnabled = !this._isEnabled\n }\n\n toggle() {\n if (!this._isEnabled) {\n return\n }\n\n this._activeTrigger.click = !this._activeTrigger.click\n if (this._isShown()) {\n this._leave()\n return\n }\n\n this._enter()\n }\n\n dispose() {\n clearTimeout(this._timeout)\n\n EventHandler.off(this._element.closest(SELECTOR_MODAL), EVENT_MODAL_HIDE, this._hideModalHandler)\n\n if (this._element.getAttribute('data-bs-original-title')) {\n this._element.setAttribute('title', this._element.getAttribute('data-bs-original-title'))\n }\n\n this._disposePopper()\n super.dispose()\n }\n\n show() {\n if (this._element.style.display === 'none') {\n throw new Error('Please use show on visible elements')\n }\n\n if (!(this._isWithContent() && this._isEnabled)) {\n return\n }\n\n const showEvent = EventHandler.trigger(this._element, this.constructor.eventName(EVENT_SHOW))\n const shadowRoot = findShadowRoot(this._element)\n const isInTheDom = (shadowRoot || this._element.ownerDocument.documentElement).contains(this._element)\n\n if (showEvent.defaultPrevented || !isInTheDom) {\n return\n }\n\n // TODO: v6 remove this or make it optional\n this._disposePopper()\n\n const tip = this._getTipElement()\n\n this._element.setAttribute('aria-describedby', tip.getAttribute('id'))\n\n const { container } = this._config\n\n if (!this._element.ownerDocument.documentElement.contains(this.tip)) {\n container.append(tip)\n EventHandler.trigger(this._element, this.constructor.eventName(EVENT_INSERTED))\n }\n\n this._popper = this._createPopper(tip)\n\n tip.classList.add(CLASS_NAME_SHOW)\n\n // If this is a touch-enabled device we add extra\n // empty mouseover listeners to the body's immediate children;\n // only needed because of broken event delegation on iOS\n // https://www.quirksmode.org/blog/archives/2014/02/mouse_event_bub.html\n if ('ontouchstart' in document.documentElement) {\n for (const element of [].concat(...document.body.children)) {\n EventHandler.on(element, 'mouseover', noop)\n }\n }\n\n const complete = () => {\n EventHandler.trigger(this._element, this.constructor.eventName(EVENT_SHOWN))\n\n if (this._isHovered === false) {\n this._leave()\n }\n\n this._isHovered = false\n }\n\n this._queueCallback(complete, this.tip, this._isAnimated())\n }\n\n hide() {\n if (!this._isShown()) {\n return\n }\n\n const hideEvent = EventHandler.trigger(this._element, this.constructor.eventName(EVENT_HIDE))\n if (hideEvent.defaultPrevented) {\n return\n }\n\n const tip = this._getTipElement()\n tip.classList.remove(CLASS_NAME_SHOW)\n\n // If this is a touch-enabled device we remove the extra\n // empty mouseover listeners we added for iOS support\n if ('ontouchstart' in document.documentElement) {\n for (const element of [].concat(...document.body.children)) {\n EventHandler.off(element, 'mouseover', noop)\n }\n }\n\n this._activeTrigger[TRIGGER_CLICK] = false\n this._activeTrigger[TRIGGER_FOCUS] = false\n this._activeTrigger[TRIGGER_HOVER] = false\n this._isHovered = null // it is a trick to support manual triggering\n\n const complete = () => {\n if (this._isWithActiveTrigger()) {\n return\n }\n\n if (!this._isHovered) {\n this._disposePopper()\n }\n\n this._element.removeAttribute('aria-describedby')\n EventHandler.trigger(this._element, this.constructor.eventName(EVENT_HIDDEN))\n }\n\n this._queueCallback(complete, this.tip, this._isAnimated())\n }\n\n update() {\n if (this._popper) {\n this._popper.update()\n }\n }\n\n // Protected\n _isWithContent() {\n return Boolean(this._getTitle())\n }\n\n _getTipElement() {\n if (!this.tip) {\n this.tip = this._createTipElement(this._newContent || this._getContentForTemplate())\n }\n\n return this.tip\n }\n\n _createTipElement(content) {\n const tip = this._getTemplateFactory(content).toHtml()\n\n // TODO: remove this check in v6\n if (!tip) {\n return null\n }\n\n tip.classList.remove(CLASS_NAME_FADE, CLASS_NAME_SHOW)\n // TODO: v6 the following can be achieved with CSS only\n tip.classList.add(`bs-${this.constructor.NAME}-auto`)\n\n const tipId = getUID(this.constructor.NAME).toString()\n\n tip.setAttribute('id', tipId)\n\n if (this._isAnimated()) {\n tip.classList.add(CLASS_NAME_FADE)\n }\n\n return tip\n }\n\n setContent(content) {\n this._newContent = content\n if (this._isShown()) {\n this._disposePopper()\n this.show()\n }\n }\n\n _getTemplateFactory(content) {\n if (this._templateFactory) {\n this._templateFactory.changeContent(content)\n } else {\n this._templateFactory = new TemplateFactory({\n ...this._config,\n // the `content` var has to be after `this._config`\n // to override config.content in case of popover\n content,\n extraClass: this._resolvePossibleFunction(this._config.customClass)\n })\n }\n\n return this._templateFactory\n }\n\n _getContentForTemplate() {\n return {\n [SELECTOR_TOOLTIP_INNER]: this._getTitle()\n }\n }\n\n _getTitle() {\n return this._resolvePossibleFunction(this._config.title) || this._element.getAttribute('data-bs-original-title')\n }\n\n // Private\n _initializeOnDelegatedTarget(event) {\n return this.constructor.getOrCreateInstance(event.delegateTarget, this._getDelegateConfig())\n }\n\n _isAnimated() {\n return this._config.animation || (this.tip && this.tip.classList.contains(CLASS_NAME_FADE))\n }\n\n _isShown() {\n return this.tip && this.tip.classList.contains(CLASS_NAME_SHOW)\n }\n\n _createPopper(tip) {\n const placement = execute(this._config.placement, [this, tip, this._element])\n const attachment = AttachmentMap[placement.toUpperCase()]\n return Popper.createPopper(this._element, tip, this._getPopperConfig(attachment))\n }\n\n _getOffset() {\n const { offset } = this._config\n\n if (typeof offset === 'string') {\n return offset.split(',').map(value => Number.parseInt(value, 10))\n }\n\n if (typeof offset === 'function') {\n return popperData => offset(popperData, this._element)\n }\n\n return offset\n }\n\n _resolvePossibleFunction(arg) {\n return execute(arg, [this._element])\n }\n\n _getPopperConfig(attachment) {\n const defaultBsPopperConfig = {\n placement: attachment,\n modifiers: [\n {\n name: 'flip',\n options: {\n fallbackPlacements: this._config.fallbackPlacements\n }\n },\n {\n name: 'offset',\n options: {\n offset: this._getOffset()\n }\n },\n {\n name: 'preventOverflow',\n options: {\n boundary: this._config.boundary\n }\n },\n {\n name: 'arrow',\n options: {\n element: `.${this.constructor.NAME}-arrow`\n }\n },\n {\n name: 'preSetPlacement',\n enabled: true,\n phase: 'beforeMain',\n fn: data => {\n // Pre-set Popper's placement attribute in order to read the arrow sizes properly.\n // Otherwise, Popper mixes up the width and height dimensions since the initial arrow style is for top placement\n this._getTipElement().setAttribute('data-popper-placement', data.state.placement)\n }\n }\n ]\n }\n\n return {\n ...defaultBsPopperConfig,\n ...execute(this._config.popperConfig, [defaultBsPopperConfig])\n }\n }\n\n _setListeners() {\n const triggers = this._config.trigger.split(' ')\n\n for (const trigger of triggers) {\n if (trigger === 'click') {\n EventHandler.on(this._element, this.constructor.eventName(EVENT_CLICK), this._config.selector, event => {\n const context = this._initializeOnDelegatedTarget(event)\n context.toggle()\n })\n } else if (trigger !== TRIGGER_MANUAL) {\n const eventIn = trigger === TRIGGER_HOVER ?\n this.constructor.eventName(EVENT_MOUSEENTER) :\n this.constructor.eventName(EVENT_FOCUSIN)\n const eventOut = trigger === TRIGGER_HOVER ?\n this.constructor.eventName(EVENT_MOUSELEAVE) :\n this.constructor.eventName(EVENT_FOCUSOUT)\n\n EventHandler.on(this._element, eventIn, this._config.selector, event => {\n const context = this._initializeOnDelegatedTarget(event)\n context._activeTrigger[event.type === 'focusin' ? TRIGGER_FOCUS : TRIGGER_HOVER] = true\n context._enter()\n })\n EventHandler.on(this._element, eventOut, this._config.selector, event => {\n const context = this._initializeOnDelegatedTarget(event)\n context._activeTrigger[event.type === 'focusout' ? TRIGGER_FOCUS : TRIGGER_HOVER] =\n context._element.contains(event.relatedTarget)\n\n context._leave()\n })\n }\n }\n\n this._hideModalHandler = () => {\n if (this._element) {\n this.hide()\n }\n }\n\n EventHandler.on(this._element.closest(SELECTOR_MODAL), EVENT_MODAL_HIDE, this._hideModalHandler)\n }\n\n _fixTitle() {\n const title = this._element.getAttribute('title')\n\n if (!title) {\n return\n }\n\n if (!this._element.getAttribute('aria-label') && !this._element.textContent.trim()) {\n this._element.setAttribute('aria-label', title)\n }\n\n this._element.setAttribute('data-bs-original-title', title) // DO NOT USE IT. Is only for backwards compatibility\n this._element.removeAttribute('title')\n }\n\n _enter() {\n if (this._isShown() || this._isHovered) {\n this._isHovered = true\n return\n }\n\n this._isHovered = true\n\n this._setTimeout(() => {\n if (this._isHovered) {\n this.show()\n }\n }, this._config.delay.show)\n }\n\n _leave() {\n if (this._isWithActiveTrigger()) {\n return\n }\n\n this._isHovered = false\n\n this._setTimeout(() => {\n if (!this._isHovered) {\n this.hide()\n }\n }, this._config.delay.hide)\n }\n\n _setTimeout(handler, timeout) {\n clearTimeout(this._timeout)\n this._timeout = setTimeout(handler, timeout)\n }\n\n _isWithActiveTrigger() {\n return Object.values(this._activeTrigger).includes(true)\n }\n\n _getConfig(config) {\n const dataAttributes = Manipulator.getDataAttributes(this._element)\n\n for (const dataAttribute of Object.keys(dataAttributes)) {\n if (DISALLOWED_ATTRIBUTES.has(dataAttribute)) {\n delete dataAttributes[dataAttribute]\n }\n }\n\n config = {\n ...dataAttributes,\n ...(typeof config === 'object' && config ? config : {})\n }\n config = this._mergeConfigObj(config)\n config = this._configAfterMerge(config)\n this._typeCheckConfig(config)\n return config\n }\n\n _configAfterMerge(config) {\n config.container = config.container === false ? document.body : getElement(config.container)\n\n if (typeof config.delay === 'number') {\n config.delay = {\n show: config.delay,\n hide: config.delay\n }\n }\n\n if (typeof config.title === 'number') {\n config.title = config.title.toString()\n }\n\n if (typeof config.content === 'number') {\n config.content = config.content.toString()\n }\n\n return config\n }\n\n _getDelegateConfig() {\n const config = {}\n\n for (const [key, value] of Object.entries(this._config)) {\n if (this.constructor.Default[key] !== value) {\n config[key] = value\n }\n }\n\n config.selector = false\n config.trigger = 'manual'\n\n // In the future can be replaced with:\n // const keysWithDifferentValues = Object.entries(this._config).filter(entry => this.constructor.Default[entry[0]] !== this._config[entry[0]])\n // `Object.fromEntries(keysWithDifferentValues)`\n return config\n }\n\n _disposePopper() {\n if (this._popper) {\n this._popper.destroy()\n this._popper = null\n }\n\n if (this.tip) {\n this.tip.remove()\n this.tip = null\n }\n }\n\n // Static\n static jQueryInterface(config) {\n return this.each(function () {\n const data = Tooltip.getOrCreateInstance(this, config)\n\n if (typeof config !== 'string') {\n return\n }\n\n if (typeof data[config] === 'undefined') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config]()\n })\n }\n}\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Tooltip)\n\nexport default Tooltip\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap popover.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport Tooltip from './tooltip.js'\nimport { defineJQueryPlugin } from './util/index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'popover'\n\nconst SELECTOR_TITLE = '.popover-header'\nconst SELECTOR_CONTENT = '.popover-body'\n\nconst Default = {\n ...Tooltip.Default,\n content: '',\n offset: [0, 8],\n placement: 'right',\n template: '
' +\n '
' +\n '

' +\n '
' +\n '
',\n trigger: 'click'\n}\n\nconst DefaultType = {\n ...Tooltip.DefaultType,\n content: '(null|string|element|function)'\n}\n\n/**\n * Class definition\n */\n\nclass Popover extends Tooltip {\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Overrides\n _isWithContent() {\n return this._getTitle() || this._getContent()\n }\n\n // Private\n _getContentForTemplate() {\n return {\n [SELECTOR_TITLE]: this._getTitle(),\n [SELECTOR_CONTENT]: this._getContent()\n }\n }\n\n _getContent() {\n return this._resolvePossibleFunction(this._config.content)\n }\n\n // Static\n static jQueryInterface(config) {\n return this.each(function () {\n const data = Popover.getOrCreateInstance(this, config)\n\n if (typeof config !== 'string') {\n return\n }\n\n if (typeof data[config] === 'undefined') {\n throw new TypeError(`No method named \"${config}\"`)\n }\n\n data[config]()\n })\n }\n}\n\n/**\n * jQuery\n */\n\ndefineJQueryPlugin(Popover)\n\nexport default Popover\n","/**\n * --------------------------------------------------------------------------\n * Bootstrap scrollspy.js\n * Licensed under MIT (https://github.com/twbs/bootstrap/blob/main/LICENSE)\n * --------------------------------------------------------------------------\n */\n\nimport BaseComponent from './base-component.js'\nimport EventHandler from './dom/event-handler.js'\nimport SelectorEngine from './dom/selector-engine.js'\nimport { defineJQueryPlugin, getElement, isDisabled, isVisible } from './util/index.js'\n\n/**\n * Constants\n */\n\nconst NAME = 'scrollspy'\nconst DATA_KEY = 'bs.scrollspy'\nconst EVENT_KEY = `.${DATA_KEY}`\nconst DATA_API_KEY = '.data-api'\n\nconst EVENT_ACTIVATE = `activate${EVENT_KEY}`\nconst EVENT_CLICK = `click${EVENT_KEY}`\nconst EVENT_LOAD_DATA_API = `load${EVENT_KEY}${DATA_API_KEY}`\n\nconst CLASS_NAME_DROPDOWN_ITEM = 'dropdown-item'\nconst CLASS_NAME_ACTIVE = 'active'\n\nconst SELECTOR_DATA_SPY = '[data-bs-spy=\"scroll\"]'\nconst SELECTOR_TARGET_LINKS = '[href]'\nconst SELECTOR_NAV_LIST_GROUP = '.nav, .list-group'\nconst SELECTOR_NAV_LINKS = '.nav-link'\nconst SELECTOR_NAV_ITEMS = '.nav-item'\nconst SELECTOR_LIST_ITEMS = '.list-group-item'\nconst SELECTOR_LINK_ITEMS = `${SELECTOR_NAV_LINKS}, ${SELECTOR_NAV_ITEMS} > ${SELECTOR_NAV_LINKS}, ${SELECTOR_LIST_ITEMS}`\nconst SELECTOR_DROPDOWN = '.dropdown'\nconst SELECTOR_DROPDOWN_TOGGLE = '.dropdown-toggle'\n\nconst Default = {\n offset: null, // TODO: v6 @deprecated, keep it for backwards compatibility reasons\n rootMargin: '0px 0px -25%',\n smoothScroll: false,\n target: null,\n threshold: [0.1, 0.5, 1]\n}\n\nconst DefaultType = {\n offset: '(number|null)', // TODO v6 @deprecated, keep it for backwards compatibility reasons\n rootMargin: 'string',\n smoothScroll: 'boolean',\n target: 'element',\n threshold: 'array'\n}\n\n/**\n * Class definition\n */\n\nclass ScrollSpy extends BaseComponent {\n constructor(element, config) {\n super(element, config)\n\n // this._element is the observablesContainer and config.target the menu links wrapper\n this._targetLinks = new Map()\n this._observableSections = new Map()\n this._rootElement = getComputedStyle(this._element).overflowY === 'visible' ? null : this._element\n this._activeTarget = null\n this._observer = null\n this._previousScrollData = {\n visibleEntryTop: 0,\n parentScrollTop: 0\n }\n this.refresh() // initialize\n }\n\n // Getters\n static get Default() {\n return Default\n }\n\n static get DefaultType() {\n return DefaultType\n }\n\n static get NAME() {\n return NAME\n }\n\n // Public\n refresh() {\n this._initializeTargetsAndObservables()\n this._maybeEnableSmoothScroll()\n\n if (this._observer) {\n this._observer.disconnect()\n } else {\n this._observer = this._getNewObserver()\n }\n\n for (const section of this._observableSections.values()) {\n this._observer.observe(section)\n }\n }\n\n dispose() {\n this._observer.disconnect()\n super.dispose()\n }\n\n // Private\n _configAfterMerge(config) {\n // TODO: on v6 target should be given explicitly & remove the {target: 'ss-target'} case\n config.target = getElement(config.target) || document.body\n\n // TODO: v6 Only for backwards compatibility reasons. Use rootMargin only\n config.rootMargin = config.offset ? `${config.offset}px 0px -30%` : config.rootMargin\n\n if (typeof config.threshold === 'string') {\n config.threshold = config.threshold.split(',').map(value => Number.parseFloat(value))\n }\n\n return config\n }\n\n _maybeEnableSmoothScroll() {\n if (!this._config.smoothScroll) {\n return\n }\n\n // unregister any previous listeners\n EventHandler.off(this._config.target, EVENT_CLICK)\n\n EventHandler.on(this._config.target, EVENT_CLICK, SELECTOR_TARGET_LINKS, event => {\n const observableSection = this._observableSections.get(event.target.hash)\n if (observableSection) {\n event.preventDefault()\n const root = this._rootElement || window\n const height = observableSection.offsetTop - this._element.offsetTop\n if (root.scrollTo) {\n root.scrollTo({ top: height, behavior: 'smooth' })\n return\n }\n\n // Chrome 60 doesn't support `scrollTo`\n root.scrollTop = height\n }\n })\n }\n\n _getNewObserver() {\n const options = {\n root: this._rootElement,\n threshold: this._config.threshold,\n rootMargin: this._config.rootMargin\n }\n\n return new IntersectionObserver(entries => this._observerCallback(entries), options)\n }\n\n // The logic of selection\n _observerCallback(entries) {\n const targetElement = entry => this._targetLinks.get(`#${entry.target.id}`)\n const activate = entry => {\n this._previousScrollData.visibleEntryTop = entry.target.offsetTop\n this._process(targetElement(entry))\n }\n\n const parentScrollTop = (this._rootElement || document.documentElement).scrollTop\n const userScrollsDown = parentScrollTop >= this._previousScrollData.parentScrollTop\n this._previousScrollData.parentScrollTop = parentScrollTop\n\n for (const entry of entries) {\n if (!entry.isIntersecting) {\n this._activeTarget = null\n this._clearActiveClass(targetElement(entry))\n\n continue\n }\n\n const entryIsLowerThanPrevious = entry.target.offsetTop >= this._previousScrollData.visibleEntryTop\n // if we are scrolling down, pick the bigger offsetTop\n if (userScrollsDown && entryIsLowerThanPrevious) {\n activate(entry)\n // if parent isn't scrolled, let's keep the first visible item, breaking the iteration\n if (!parentScrollTop) {\n return\n }\n\n continue\n }\n\n // if we are scrolling up, pick the smallest offsetTop\n if (!userScrollsDown && !entryIsLowerThanPrevious) {\n activate(entry)\n }\n }\n }\n\n _initializeTargetsAndObservables() {\n this._targetLinks = new Map()\n this._observableSections = new Map()\n\n const targetLinks = SelectorEngine.find(SELECTOR_TARGET_LINKS, this._config.target)\n\n for (const anchor of targetLinks) {\n // ensure that the anchor has an id and is not disabled\n if (!anchor.hash || isDisabled(anchor)) {\n continue\n }\n\n const observableSection = SelectorEngine.findOne(decodeURI(anchor.hash), this._element)\n\n // ensure that the observableSection exists & is visible\n if (isVisible(observableSection)) {\n this._targetLinks.set(decodeURI(anchor.hash), anchor)\n this._observableSections.set(anchor.hash, observableSection)\n }\n }\n }\n\n _process(target) {\n if (this._activeTarget === target) {\n return\n }\n\n this._clearActiveClass(this._config.target)\n this._activeTarget = target\n target.classList.add(CLASS_NAME_ACTIVE)\n this._activateParents(target)\n\n EventHandler.trigger(this._element, EVENT_ACTIVATE, { relatedTarget: target })\n }\n\n _activateParents(target) {\n // Activate dropdown parents\n if (target.classList.contains(CLASS_NAME_DROPDOWN_ITEM)) {\n SelectorEngine.findOne(SELECTOR_DROPDOWN_TOGGLE, target.closest(SELECTOR_DROPDOWN))\n .classList.add(CLASS_NAME_ACTIVE)\n return\n }\n\n for (const listGroup of SelectorEngine.parents(target, SELECTOR_NAV_LIST_GROUP)) {\n // Set triggered links parents as active\n // With both
    and