diff --git a/video_notebooks/08_pytorch_paper_replicating_video.ipynb b/video_notebooks/08_pytorch_paper_replicating_video.ipynb index a71f5019..bf90b0db 100644 --- a/video_notebooks/08_pytorch_paper_replicating_video.ipynb +++ b/video_notebooks/08_pytorch_paper_replicating_video.ipynb @@ -2500,7 +2500,7 @@ { "cell_type": "markdown", "source": [ - "## Equation 2: Multihead Self-Attention (MSA block)\n", + "## 5. Equation 2: Multihead Self-Attention (MSA block)\n", "\n", "* **Multihead self-attention** = which part of a sequence should pay the most attention to itself?\n", " * In our case, we have a series of embedded image patches, which patch significantly relates to another patch.\n", @@ -4051,4 +4051,4 @@ "outputs": [] } ] -} \ No newline at end of file +}