From 7f9c7a45945e439904a75b10c1bd1531739d2ee3 Mon Sep 17 00:00:00 2001 From: Raj Date: Sat, 9 Dec 2023 14:03:48 +1100 Subject: [PATCH] fixed typo in Decoder generation loop comment --- docs/tutorials/nmt_with_attention.ipynb | 166 ++++++++++++------------ 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/docs/tutorials/nmt_with_attention.ipynb b/docs/tutorials/nmt_with_attention.ipynb index ea0d7b115..91b2c4e7d 100644 --- a/docs/tutorials/nmt_with_attention.ipynb +++ b/docs/tutorials/nmt_with_attention.ipynb @@ -46,26 +46,26 @@ "id": "AOpGoE2T-YXS" }, "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/nmt_with_attention\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003e\n", - " View on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/nmt_with_attention.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e\n", - " Run in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/nmt_with_attention.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e\n", - " View source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/nmt_with_attention.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " \n", + " \n", + " View on TensorFlow.org\n", + " \n", + " \n", + " \n", + " Run in Google Colab\n", + " \n", + " \n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" ] }, { @@ -76,16 +76,16 @@ "source": [ "This tutorial demonstrates how to train a sequence-to-sequence (seq2seq) model for Spanish-to-English translation roughly based on [Effective Approaches to Attention-based Neural Machine Translation](https://arxiv.org/abs/1508.04025v5) (Luong et al., 2015). \n", "\n", - "\u003ctable\u003e\n", - "\u003ctr\u003e\n", - " \u003ctd\u003e\n", - " \u003cimg width=400 src=\"https://www.tensorflow.org/images/tutorials/transformer/RNN%2Battention-words-spa.png\"/\u003e\n", - " \u003c/td\u003e\n", - "\u003c/tr\u003e\n", - "\u003ctr\u003e\n", - " \u003cth colspan=1\u003eThis tutorial: An encoder/decoder connected by attention.\u003c/th\u003e\n", - "\u003ctr\u003e\n", - "\u003c/table\u003e\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
\n", + " \n", + "
This tutorial: An encoder/decoder connected by attention.
\n", "\n", "While this architecture is somewhat outdated, it is still a very useful project to work through to get a deeper understanding of sequence-to-sequence models and attention mechanisms (before going on to [Transformers](transformer.ipynb))." ] @@ -108,7 +108,7 @@ "\n", "The translation quality is reasonable for a toy example, but the generated attention plot is perhaps more interesting. This shows which parts of the input sentence has the model's attention while translating:\n", "\n", - "\u003cimg src=\"https://tensorflow.org/images/spanish-english.png\" alt=\"spanish-english attention plot\"\u003e\n", + "\"spanish-english\n", "\n", "Note: This example takes approximately 10 minutes to run." ] @@ -130,7 +130,7 @@ }, "outputs": [], "source": [ - "!pip install \"tensorflow-text\u003e=2.11\"\n", + "!pip install \"tensorflow-text>=2.11\"\n", "!pip install einops" ] }, @@ -331,7 +331,7 @@ "BUFFER_SIZE = len(context_raw)\n", "BATCH_SIZE = 64\n", "\n", - "is_train = np.random.uniform(size=(len(target_raw),)) \u003c 0.8\n", + "is_train = np.random.uniform(size=(len(target_raw),)) < 0.8\n", "\n", "train_raw = (\n", " tf.data.Dataset\n", @@ -688,20 +688,20 @@ "1. Feeding the state from the encoder's RNN to the decoder's RNN\n", "2. Feeding the attention output back to the RNN's input.\n", "\n", - "\u003ctable\u003e\n", - "\u003ctr\u003e\n", - " \u003ctd\u003e\n", - " \u003cimg width=500 src=\"https://www.tensorflow.org/images/seq2seq/attention_mechanism.jpg\"/\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003cimg width=380 src=\"https://www.tensorflow.org/images/tutorials/transformer/RNN+attention.png\"/\u003e\n", - " \u003c/td\u003e\n", - "\u003c/tr\u003e\n", - "\u003ctr\u003e\n", - " \u003cth colspan=1\u003eThe original from \u003ca href=https://arxiv.org/abs/1508.04025v5\u003eEffective Approaches to Attention-based Neural Machine Translation\u003c/a\u003e\u003c/th\u003e\n", - " \u003cth colspan=1\u003eThis tutorial's model\u003c/th\u003e\n", - "\u003ctr\u003e\n", - "\u003c/table\u003e\n" + "\n", + "\n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
The original from Effective Approaches to Attention-based Neural Machine TranslationThis tutorial's model
\n" ] }, { @@ -734,16 +734,16 @@ "\n", "The goal of the encoder is to process the context sequence into a sequence of vectors that are useful for the decoder as it attempts to predict the next output for each timestep. Since the context sequence is constant, there is no restriction on how information can flow in the encoder, so use a bidirectional-RNN to do the processing:\n", "\n", - "\u003ctable\u003e\n", - "\u003ctr\u003e\n", - " \u003ctd\u003e\n", - " \u003cimg width=500 src=\"https://tensorflow.org/images/tutorials/transformer/RNN-bidirectional.png\"/\u003e\n", - " \u003c/td\u003e\n", - "\u003c/tr\u003e\n", - "\u003ctr\u003e\n", - " \u003cth\u003eA bidirectional RNN\u003c/th\u003e\n", - "\u003ctr\u003e\n", - "\u003c/table\u003e\n", + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
\n", + " \n", + "
A bidirectional RNN
\n", "\n", "The encoder:\n", "\n", @@ -841,16 +841,16 @@ "\n", "The simplest way you could calculate a single vector from the entire sequence would be to take the average across the sequence (`layers.GlobalAveragePooling1D`). An attention layer is similar, but calculates a **weighted** average across the context sequence. Where the weights are calculated from the combination of context and \"query\" vectors.\n", "\n", - "\u003ctable\u003e\n", - "\u003ctr\u003e\n", - " \u003ctd\u003e\n", - " \u003cimg width=500 src=\"https://www.tensorflow.org/images/tutorials/transformer/CrossAttention-new-full.png\"/\u003e\n", - " \u003c/td\u003e\n", - "\u003c/tr\u003e\n", - "\u003ctr\u003e\n", - " \u003cth colspan=1\u003eThe attention layer\u003c/th\u003e\n", - "\u003ctr\u003e\n", - "\u003c/table\u003e" + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
\n", + " \n", + "
The attention layer
" ] }, { @@ -995,16 +995,16 @@ "\n", "When running inference with this model it produces one word at a time, and those are fed back into the model.\n", "\n", - "\u003ctable\u003e\n", - "\u003ctr\u003e\n", - " \u003ctd\u003e\n", - " \u003cimg width=500 src=\"https://tensorflow.org/images/tutorials/transformer/RNN.png\"/\u003e\n", - " \u003c/td\u003e\n", - "\u003c/tr\u003e\n", - "\u003ctr\u003e\n", - " \u003cth\u003eA unidirectional RNN\u003c/th\u003e\n", - "\u003ctr\u003e\n", - "\u003c/table\u003e" + "\n", + "\n", + " \n", + "\n", + "\n", + " \n", + "\n", + "
\n", + " \n", + "
A unidirectional RNN
" ] }, { @@ -1281,7 +1281,7 @@ "# Stack all the tokens together.\n", "tokens = tf.concat(tokens, axis=-1) # (batch, t)\n", "\n", - "# Convert the tokens back to a a string\n", + "# Convert the tokens back to a string\n", "result = decoder.tokens_to_text(tokens)\n", "result[:3].numpy()" ] @@ -1554,7 +1554,7 @@ "source": [ "### Translate\n", "\n", - "Now that the model is trained, implement a function to execute the full `text =\u003e text` translation. This code is basically identical to the [inference example](#inference) in the [decoder section](#the_decoder), but this also captures the attention weights." + "Now that the model is trained, implement a function to execute the full `text => text` translation. This code is basically identical to the [inference example](#inference) in the [decoder section](#the_decoder), but this also captures the attention weights." ] }, { @@ -1593,8 +1593,8 @@ " break\n", "\n", " # Stack the lists of tokens and attention weights.\n", - " tokens = tf.concat(tokens, axis=-1) # t*[(batch 1)] -\u003e (batch, t)\n", - " self.last_attention_weights = tf.concat(attention_weights, axis=1) # t*[(batch 1 s)] -\u003e (batch, t s)\n", + " tokens = tf.concat(tokens, axis=-1) # t*[(batch 1)] -> (batch, t)\n", + " self.last_attention_weights = tf.concat(attention_weights, axis=1) # t*[(batch 1 s)] -> (batch, t s)\n", "\n", " result = self.decoder.tokens_to_text(tokens)\n", " return result" @@ -2026,7 +2026,7 @@ " tokens = tokens.write(t, next_token) # next_token shape is (batch, 1)\n", " ...\n", " tokens = tokens.stack()\n", - " tokens = einops.rearrange(tokens, 't batch 1 -\u003e batch t')\n", + " tokens = einops.rearrange(tokens, 't batch 1 -> batch t')\n", "```" ] }, @@ -2080,7 +2080,7 @@ " # Convert the list of generated token ids to a list of strings.\n", " tokens = tokens.stack()\n", " shape_checker(tokens, 't batch t1')\n", - " tokens = einops.rearrange(tokens, 't batch 1 -\u003e batch t')\n", + " tokens = einops.rearrange(tokens, 't batch 1 -> batch t')\n", " shape_checker(tokens, 'batch t')\n", "\n", " text = self.decoder.tokens_to_text(tokens)\n",