From eafe86039c6a802f5abdd316ec446d3d8c094725 Mon Sep 17 00:00:00 2001 From: fxl <1475486684@qq.com> Date: Thu, 5 Dec 2024 20:49:13 +0800 Subject: [PATCH] "refactor-clear-confusion" --- ..._model_deployment_exercise_solutions.ipynb | 11564 ++++++++-------- 1 file changed, 5782 insertions(+), 5782 deletions(-) diff --git a/extras/solutions/09_pytorch_model_deployment_exercise_solutions.ipynb b/extras/solutions/09_pytorch_model_deployment_exercise_solutions.ipynb index f24aa357..b97c1019 100644 --- a/extras/solutions/09_pytorch_model_deployment_exercise_solutions.ipynb +++ b/extras/solutions/09_pytorch_model_deployment_exercise_solutions.ipynb @@ -1,5969 +1,5969 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "colab_type": "text", - "id": "view-in-github" - }, - "source": [ - "\"Open" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zNqPNlYylluR" + }, + "source": [ + "# 09. PyTorch Model Deployment Exercise Solutions\n", + "\n", + "Welcome to the 09. PyTorch Model Deployment exercise solutions.\n", + "\n", + "Your objective is to write code to satisify each of the exercises below.\n", + "\n", + "Some starter code has been provided to make sure you have all the resources you need.\n", + "\n", + "> **Note:** There may be more than one solution to each of the exercises.\n", + "\n", + "## Resources\n", + "\n", + "1. These exercises/solutions are based on [section 09. PyTorch Model Deployment](https://www.learnpytorch.io/09_pytorch_model_deployment/) of the Learn PyTorch for Deep Learning course by Zero to Mastery.\n", + "2. See a live [walkthrough of the solutions (errors and all) on YouTube](https://youtu.be/jOX5ZCkWO-0) (but try the exercises yourself first!).\n", + "3. See [all solutions on the course GitHub](https://github.com/mrdbourke/pytorch-deep-learning/tree/main/extras/solutions).\n", + "\n", + "> **Note:** The first section of this notebook is dedicated to getting various helper functions and datasets used for the exercises. The exercises start at the heading \"Exercise 1: ...\"." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sf8ab9cyHTzU" + }, + "source": [ + "### Get various imports and helper functions\n", + "\n", + "The code in the following cells prepares imports and data for the exercises below. They are taken from [09. PyTorch Model Deployment](https://www.learnpytorch.io/09_pytorch_model_deployment/). " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "ChRaHUSJ8DYZ", + "outputId": "0a27f03f-33ea-4721-e4ec-858b8dc255b1" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "zNqPNlYylluR" - }, - "source": [ - "# 09. PyTorch Model Deployment Exercise Solutions\n", - "\n", - "Welcome to the 09. PyTorch Model Deployment exercise solutions.\n", - "\n", - "Your objective is to write code to satisify each of the exercises below.\n", - "\n", - "Some starter code has been provided to make sure you have all the resources you need.\n", - "\n", - "> **Note:** There may be more than one solution to each of the exercises.\n", - "\n", - "## Resources\n", - "\n", - "1. These exercises/solutions are based on [section 09. PyTorch Model Deployment](https://www.learnpytorch.io/09_pytorch_model_deployment/) of the Learn PyTorch for Deep Learning course by Zero to Mastery.\n", - "2. See a live [walkthrough of the solutions (errors and all) on YouTube](https://youtu.be/jOX5ZCkWO-0) (but try the exercises yourself first!).\n", - "3. See [all solutions on the course GitHub](https://github.com/mrdbourke/pytorch-deep-learning/tree/main/extras/solutions).\n", - "\n", - "> **Note:** The first section of this notebook is dedicated to getting various helper functions and datasets used for the exercises. The exercises start at the heading \"Exercise 1: ...\"." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "torch version: 1.12.1+cu113\n", + "torchvision version: 0.13.1+cu113\n" + ] + } + ], + "source": [ + "# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+\n", + "try:\n", + " import torch\n", + " import torchvision\n", + " assert int(torch.__version__.split(\".\")[1]) >= 12, \"torch version should be 1.12+\"\n", + " assert int(torchvision.__version__.split(\".\")[1]) >= 13, \"torchvision version should be 0.13+\"\n", + " print(f\"torch version: {torch.__version__}\")\n", + " print(f\"torchvision version: {torchvision.__version__}\")\n", + "except:\n", + " print(f\"[INFO] torch/torchvision versions not as required, installing nightly versions.\")\n", + " !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113\n", + " import torch\n", + " import torchvision\n", + " print(f\"torch version: {torch.__version__}\")\n", + " print(f\"torchvision version: {torchvision.__version__}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "Y5H5P8EjCNGK" + }, + "outputs": [], + "source": [ + "# Continue with regular imports\n", + "import matplotlib.pyplot as plt\n", + "import torch\n", + "import torchvision\n", + "\n", + "from torch import nn\n", + "from torchvision import transforms\n", + "\n", + "# Try to get torchinfo, install it if it doesn't work\n", + "try:\n", + " from torchinfo import summary\n", + "except:\n", + " print(\"[INFO] Couldn't find torchinfo... installing it.\")\n", + " !pip install -q torchinfo\n", + " from torchinfo import summary\n", + "\n", + "# Try to import the going_modular directory, download it from GitHub if it doesn't work\n", + "try:\n", + " from going_modular.going_modular import data_setup, engine\n", + " from helper_functions import download_data, set_seeds, plot_loss_curves\n", + "except:\n", + " # Get the going_modular scripts\n", + " print(\"[INFO] Couldn't find going_modular or helper_functions scripts... downloading them from GitHub.\")\n", + " !git clone https://github.com/mrdbourke/pytorch-deep-learning\n", + " !mv pytorch-deep-learning/going_modular .\n", + " !mv pytorch-deep-learning/helper_functions.py . # get the helper_functions.py script\n", + " !rm -rf pytorch-deep-learning\n", + " from going_modular.going_modular import data_setup, engine\n", + " from helper_functions import download_data, set_seeds, plot_loss_curves" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 }, + "id": "bE1AAH_uCjiP", + "outputId": "8337aa8d-9a46-41ad-9f96-658e857fcf57" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "sf8ab9cyHTzU" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" }, - "source": [ - "### Get various imports and helper functions\n", - "\n", - "The code in the following cells prepares imports and data for the exercises below. They are taken from [09. PyTorch Model Deployment](https://www.learnpytorch.io/09_pytorch_model_deployment/). " + "text/plain": [ + "'cuda'" ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "device" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GmS5yuvxCpLp" + }, + "source": [ + "### Get data\n", + "\n", + "Want to download the data we've been using in PyTorch Model Deployment: https://www.learnpytorch.io/09_pytorch_model_deployment/#1-getting-data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "dm772wqgCzN9", + "outputId": "ca47901f-5786-4d76-d768-58ad8349704c" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "ChRaHUSJ8DYZ", - "outputId": "0a27f03f-33ea-4721-e4ec-858b8dc255b1" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch version: 1.12.1+cu113\n", - "torchvision version: 0.13.1+cu113\n" - ] - } - ], - "source": [ - "# For this notebook to run with updated APIs, we need torch 1.12+ and torchvision 0.13+\n", - "try:\n", - " import torch\n", - " import torchvision\n", - " assert int(torch.__version__.split(\".\")[1]) >= 12, \"torch version should be 1.12+\"\n", - " assert int(torchvision.__version__.split(\".\")[1]) >= 13, \"torchvision version should be 0.13+\"\n", - " print(f\"torch version: {torch.__version__}\")\n", - " print(f\"torchvision version: {torchvision.__version__}\")\n", - "except:\n", - " print(f\"[INFO] torch/torchvision versions not as required, installing nightly versions.\")\n", - " !pip3 install -U torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113\n", - " import torch\n", - " import torchvision\n", - " print(f\"torch version: {torch.__version__}\")\n", - " print(f\"torchvision version: {torchvision.__version__}\")\n" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] data/pizza_steak_sushi_20_percent directory exists, skipping download.\n" + ] }, { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "id": "Y5H5P8EjCNGK" - }, - "outputs": [], - "source": [ - "# Continue with regular imports\n", - "import matplotlib.pyplot as plt\n", - "import torch\n", - "import torchvision\n", - "\n", - "from torch import nn\n", - "from torchvision import transforms\n", - "\n", - "# Try to get torchinfo, install it if it doesn't work\n", - "try:\n", - " from torchinfo import summary\n", - "except:\n", - " print(\"[INFO] Couldn't find torchinfo... installing it.\")\n", - " !pip install -q torchinfo\n", - " from torchinfo import summary\n", - "\n", - "# Try to import the going_modular directory, download it from GitHub if it doesn't work\n", - "try:\n", - " from going_modular.going_modular import data_setup, engine\n", - " from helper_functions import download_data, set_seeds, plot_loss_curves\n", - "except:\n", - " # Get the going_modular scripts\n", - " print(\"[INFO] Couldn't find going_modular or helper_functions scripts... downloading them from GitHub.\")\n", - " !git clone https://github.com/mrdbourke/pytorch-deep-learning\n", - " !mv pytorch-deep-learning/going_modular .\n", - " !mv pytorch-deep-learning/helper_functions.py . # get the helper_functions.py script\n", - " !rm -rf pytorch-deep-learning\n", - " from going_modular.going_modular import data_setup, engine\n", - " from helper_functions import download_data, set_seeds, plot_loss_curves" + "data": { + "text/plain": [ + "PosixPath('data/pizza_steak_sushi_20_percent')" ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Download pizza, steak, sushi images from GitHub\n", + "image_path = download_data(source=\"https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip\",\n", + " destination=\"pizza_steak_sushi_20_percent\")\n", + "image_path" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "r1ML2c-dCzCi" + }, + "outputs": [], + "source": [ + "# Setup directory paths to train and test images\n", + "train_dir = image_path / \"train\"\n", + "test_dir = image_path / \"test\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nNBZ_2h_Cy86" + }, + "source": [ + "### Preprocess data\n", + "\n", + "Turn images into tensors using same code as PyTorch Paper Replicating section 2.1 and 2.2: https://www.learnpytorch.io/08_pytorch_paper_replicating/#21-prepare-transforms-for-images" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "mU0T4gP3DJdF", + "outputId": "00d58b26-a6cb-4c3f-b774-2414c36bbce9" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 36 - }, - "id": "bE1AAH_uCjiP", - "outputId": "8337aa8d-9a46-41ad-9f96-658e857fcf57" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - }, - "text/plain": [ - "'cuda'" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", - "device" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Manually created transforms: Compose(\n", + " Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)\n", + " ToTensor()\n", + ")\n" + ] + } + ], + "source": [ + "# Create image size (from Table 3 in the ViT paper) \n", + "IMG_SIZE = 224\n", + "\n", + "# Create transform pipeline manually\n", + "manual_transforms = transforms.Compose([\n", + " transforms.Resize((IMG_SIZE, IMG_SIZE)),\n", + " transforms.ToTensor(),\n", + "]) \n", + "print(f\"Manually created transforms: {manual_transforms}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "W4vWgIprDJau", + "outputId": "10423c3f-dc63-4e76-cd80-db5c6915662a" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "GmS5yuvxCpLp" - }, - "source": [ - "### Get data\n", - "\n", - "Want to download the data we've been using in PyTorch Model Deployment: https://www.learnpytorch.io/09_pytorch_model_deployment/#1-getting-data" + "data": { + "text/plain": [ + "(,\n", + " ,\n", + " ['pizza', 'steak', 'sushi'])" ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Set the batch size\n", + "BATCH_SIZE = 32 # this is lower than the ViT paper but it's because we're starting small\n", + "\n", + "# Create data loaders\n", + "train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(\n", + " train_dir=train_dir,\n", + " test_dir=test_dir,\n", + " transform=manual_transforms, # use manually created transforms\n", + " batch_size=BATCH_SIZE\n", + ")\n", + "\n", + "train_dataloader, test_dataloader, class_names" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "u7eLIFHyDJRr", + "outputId": "dfa3408c-0ef4-45ae-c5c7-88f8f92d0beb" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "dm772wqgCzN9", - "outputId": "ca47901f-5786-4d76-d768-58ad8349704c" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[INFO] data/pizza_steak_sushi_20_percent directory exists, skipping download.\n" - ] - }, - { - "data": { - "text/plain": [ - "PosixPath('data/pizza_steak_sushi_20_percent')" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Download pizza, steak, sushi images from GitHub\n", - "image_path = download_data(source=\"https://github.com/mrdbourke/pytorch-deep-learning/raw/main/data/pizza_steak_sushi_20_percent.zip\",\n", - " destination=\"pizza_steak_sushi_20_percent\")\n", - "image_path" + "data": { + "text/plain": [ + "(torch.Size([3, 224, 224]), tensor(2))" ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get a batch of images\n", + "image_batch, label_batch = next(iter(train_dataloader))\n", + "\n", + "# Get a single image from the batch\n", + "image, label = image_batch[0], label_batch[0]\n", + "\n", + "# View the batch shapes\n", + "image.shape, label" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 264 }, + "id": "2yyNHCmCDbSR", + "outputId": "a3d16804-3449-4105-e7e6-7da79bc495c2" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "id": "r1ML2c-dCzCi" - }, - "outputs": [], - "source": [ - "# Setup directory paths to train and test images\n", - "train_dir = image_path / \"train\"\n", - "test_dir = image_path / \"test\"" + "data": { + "image/png": "", + "text/plain": [ + "
" ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Plot image with matplotlib\n", + "plt.imshow(image.permute(1, 2, 0)) # rearrange image dimensions to suit matplotlib [color_channels, height, width] -> [height, width, color_channels]\n", + "plt.title(class_names[label])\n", + "plt.axis(False);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nwmoMhW8IqSu" + }, + "source": [ + "## Exercise 1. Make and time predictions with both feature extractor models on the test dataset using the GPU (`device=\"cuda\"`). \n", + "\n", + "* Compare the model's prediction times on GPU vs CPU - does this close the gap between them? As in, does making predictions on the GPU make the ViT feature extractor prediction times closer to the EffNetB2 feature extractor prediction times?\n", + "* You'll find code to do these steps in [section 5. Making predictions with our trained models and timing them](https://www.learnpytorch.io/09_pytorch_model_deployment/#5-making-predictions-with-our-trained-models-and-timing-them) and [section 6. Comparing model results, prediction times and size](https://www.learnpytorch.io/09_pytorch_model_deployment/#6-comparing-model-results-prediction-times-and-size)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P8DX4FnUe0lp" + }, + "source": [ + "### Train two models on Pizza, Steak, Sushi data\n", + "\n", + "Need:\n", + "* Trained EffNetB2 feature extractor \n", + "* Trained ViT feature extractor" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W9gGOufOfD5l" + }, + "source": [ + "### EffNetB2 \n", + "\n", + "See function creation source here: https://www.learnpytorch.io/09_pytorch_model_deployment/#31-creating-a-function-to-make-an-effnetb2-feature-extractor" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "UR-P1QaBfFoZ" + }, + "outputs": [], + "source": [ + "def create_effnetb2_model(num_classes:int=3, \n", + " seed:int=42):\n", + " \"\"\"Creates an EfficientNetB2 feature extractor model and transforms.\n", + "\n", + " Args:\n", + " num_classes (int, optional): number of classes in the classifier head. \n", + " Defaults to 3.\n", + " seed (int, optional): random seed value. Defaults to 42.\n", + "\n", + " Returns:\n", + " model (torch.nn.Module): EffNetB2 feature extractor model. \n", + " transforms (torchvision.transforms): EffNetB2 image transforms.\n", + " \"\"\"\n", + " # 1, 2, 3. Create EffNetB2 pretrained weights, transforms and model\n", + " weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT\n", + " transforms = weights.transforms()\n", + " model = torchvision.models.efficientnet_b2(weights=weights)\n", + "\n", + " # 4. Freeze all layers in base model\n", + " for param in model.parameters():\n", + " param.requires_grad = False\n", + "\n", + " # 5. Change classifier head with random seed for reproducibility\n", + " torch.manual_seed(seed)\n", + " model.classifier = nn.Sequential(\n", + " nn.Dropout(p=0.3, inplace=True),\n", + " nn.Linear(in_features=1408, out_features=num_classes),\n", + " )\n", + " \n", + " return model, transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "PcLxlyFrfUGJ", + "outputId": "47bf699d-c300-43e5-e26d-680e5518eb91" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "nNBZ_2h_Cy86" - }, - "source": [ - "### Preprocess data\n", - "\n", - "Turn images into tensors using same code as PyTorch Paper Replicating section 2.1 and 2.2: https://www.learnpytorch.io/08_pytorch_paper_replicating/#21-prepare-transforms-for-images" + "data": { + "text/plain": [ + "ImageClassification(\n", + " crop_size=[288]\n", + " resize_size=[288]\n", + " mean=[0.485, 0.456, 0.406]\n", + " std=[0.229, 0.224, 0.225]\n", + " interpolation=InterpolationMode.BICUBIC\n", + ")" ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "effnetb2, effnetb2_transforms = create_effnetb2_model()\n", + "# effnetb2\n", + "effnetb2_transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "b8LqE-XMgZM6", + "outputId": "1f5b9547-8ce2-4b99-f4aa-8e90f0c7fb3d" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "mU0T4gP3DJdF", - "outputId": "00d58b26-a6cb-4c3f-b774-2414c36bbce9" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Manually created transforms: Compose(\n", - " Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=None)\n", - " ToTensor()\n", - ")\n" - ] - } - ], - "source": [ - "# Create image size (from Table 3 in the ViT paper) \n", - "IMG_SIZE = 224\n", - "\n", - "# Create transform pipeline manually\n", - "manual_transforms = transforms.Compose([\n", - " transforms.Resize((IMG_SIZE, IMG_SIZE)),\n", - " transforms.ToTensor(),\n", - "]) \n", - "print(f\"Manually created transforms: {manual_transforms}\")" + "data": { + "text/plain": [ + "(PosixPath('data/pizza_steak_sushi_20_percent/train'),\n", + " PosixPath('data/pizza_steak_sushi_20_percent/test'))" ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dir, test_dir" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "IWMbHg7pf24Y", + "outputId": "39d2b3f8-7796-4723-9e02-293ca9c395f6" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "W4vWgIprDJau", - "outputId": "10423c3f-dc63-4e76-cd80-db5c6915662a" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(,\n", - " ,\n", - " ['pizza', 'steak', 'sushi'])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Set the batch size\n", - "BATCH_SIZE = 32 # this is lower than the ViT paper but it's because we're starting small\n", - "\n", - "# Create data loaders\n", - "train_dataloader, test_dataloader, class_names = data_setup.create_dataloaders(\n", - " train_dir=train_dir,\n", - " test_dir=test_dir,\n", - " transform=manual_transforms, # use manually created transforms\n", - " batch_size=BATCH_SIZE\n", - ")\n", - "\n", - "train_dataloader, test_dataloader, class_names" + "data": { + "text/plain": [ + "(15, 5, ['pizza', 'steak', 'sushi'])" ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create dataloaders for EffNetB2 \n", + "from going_modular.going_modular import data_setup\n", + "\n", + "BATCH_SIZE = 32\n", + "train_dataloader_effnetb2, test_dataloader_effnetb2, class_names = data_setup.create_dataloaders(train_dir=train_dir,\n", + " test_dir=test_dir,\n", + " transform=effnetb2_transforms,\n", + " batch_size=BATCH_SIZE)\n", + "\n", + "len(train_dataloader_effnetb2), len(test_dataloader_effnetb2), class_names" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 227, + "referenced_widgets": [ + "198a73324ffa4478afc64c011df8368c", + "d9c166e1d7d9461eb3d9fe0fa2ced5b1", + "8025ad7772fb40f69b2fddfd7eab62e6", + "984c980048f24b2cbfb45df1dc3c9bd7", + "583f6c14fb3b4125a7c4486782fe7a2f", + "254d130afee243edb15dba9198e14f95", + "350d659062d34f90b0a61aca7f07b108", + "6f375d622ce84f95959f1ec00ab5b4fb", + "1b7118878ac04c4ab3a5555f59aade61", + "f7ddad2e2e604fd8b0240683f54ad8a3", + "afdc91f716ec416db36ef586ce623942" + ] }, + "id": "POcQESk6gulj", + "outputId": "2181b910-47c7-4e19-b574-607f400ef0bb" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "u7eLIFHyDJRr", - "outputId": "dfa3408c-0ef4-45ae-c5c7-88f8f92d0beb" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "198a73324ffa4478afc64c011df8368c", + "version_major": 2, + "version_minor": 0 }, - "outputs": [ - { - "data": { - "text/plain": [ - "(torch.Size([3, 224, 224]), tensor(2))" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get a batch of images\n", - "image_batch, label_batch = next(iter(train_dataloader))\n", - "\n", - "# Get a single image from the batch\n", - "image, label = image_batch[0], label_batch[0]\n", - "\n", - "# View the batch shapes\n", - "image.shape, label" + "text/plain": [ + " 0%| | 0/10 [00:00" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "# Plot image with matplotlib\n", - "plt.imshow(image.permute(1, 2, 0)) # rearrange image dimensions to suit matplotlib [color_channels, height, width] -> [height, width, color_channels]\n", - "plt.title(class_names[label])\n", - "plt.axis(False);" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 1 | train_loss: 0.9856 | train_acc: 0.5604 | test_loss: 0.7408 | test_acc: 0.9347\n", + "Epoch: 2 | train_loss: 0.7175 | train_acc: 0.8438 | test_loss: 0.5869 | test_acc: 0.9409\n", + "Epoch: 3 | train_loss: 0.5876 | train_acc: 0.8917 | test_loss: 0.4909 | test_acc: 0.9500\n", + "Epoch: 4 | train_loss: 0.4474 | train_acc: 0.9062 | test_loss: 0.4355 | test_acc: 0.9409\n", + "Epoch: 5 | train_loss: 0.4290 | train_acc: 0.9104 | test_loss: 0.3915 | test_acc: 0.9443\n", + "Epoch: 6 | train_loss: 0.4381 | train_acc: 0.8896 | test_loss: 0.3512 | test_acc: 0.9688\n", + "Epoch: 7 | train_loss: 0.4245 | train_acc: 0.8771 | test_loss: 0.3268 | test_acc: 0.9563\n", + "Epoch: 8 | train_loss: 0.3897 | train_acc: 0.8958 | test_loss: 0.3457 | test_acc: 0.9381\n", + "Epoch: 9 | train_loss: 0.3749 | train_acc: 0.8812 | test_loss: 0.3129 | test_acc: 0.9131\n", + "Epoch: 10 | train_loss: 0.3757 | train_acc: 0.8604 | test_loss: 0.2813 | test_acc: 0.9688\n" + ] + } + ], + "source": [ + "# Train EffNetB2 feature extractor\n", + "from going_modular.going_modular import engine\n", + "\n", + "optimizer = torch.optim.Adam(params=effnetb2.parameters(), lr=1e-3)\n", + "\n", + "loss_fn = torch.nn.CrossEntropyLoss()\n", + "\n", + "set_seeds()\n", + "effnetb2_results = engine.train(model=effnetb2,\n", + " train_dataloader=train_dataloader_effnetb2,\n", + " test_dataloader=test_dataloader_effnetb2,\n", + " epochs=10,\n", + " optimizer=optimizer,\n", + " loss_fn=loss_fn,\n", + " device=device)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "dE5GngR5igju" + }, + "outputs": [], + "source": [ + "# With label_smoothing=0.1\n", + "# Epoch: 1 | train_loss: 1.0005 | train_acc: 0.5708 | test_loss: 0.7872 | test_acc: 0.9347\n", + "# Epoch: 2 | train_loss: 0.7704 | train_acc: 0.8500 | test_loss: 0.6603 | test_acc: 0.9409\n", + "# Epoch: 3 | train_loss: 0.6679 | train_acc: 0.8896 | test_loss: 0.5883 | test_acc: 0.9500\n", + "# Epoch: 4 | train_loss: 0.5608 | train_acc: 0.9146 | test_loss: 0.5522 | test_acc: 0.9318\n", + "# Epoch: 5 | train_loss: 0.5528 | train_acc: 0.9125 | test_loss: 0.5239 | test_acc: 0.9352\n", + "# Epoch: 6 | train_loss: 0.5718 | train_acc: 0.8875 | test_loss: 0.4973 | test_acc: 0.9597\n", + "# Epoch: 7 | train_loss: 0.5609 | train_acc: 0.8854 | test_loss: 0.4864 | test_acc: 0.9472\n", + "# Epoch: 8 | train_loss: 0.5457 | train_acc: 0.8958 | test_loss: 0.5050 | test_acc: 0.9318\n", + "# Epoch: 9 | train_loss: 0.5338 | train_acc: 0.8896 | test_loss: 0.4809 | test_acc: 0.9193\n", + "# Epoch: 10 | train_loss: 0.5417 | train_acc: 0.8500 | test_loss: 0.4654 | test_acc: 0.9625\n", + "\n", + "# Without label_smoothing=0.1\n", + "# Epoch: 1 | train_loss: 0.9856 | train_acc: 0.5604 | test_loss: 0.7408 | test_acc: 0.9347\n", + "# Epoch: 2 | train_loss: 0.7175 | train_acc: 0.8438 | test_loss: 0.5869 | test_acc: 0.9409\n", + "# Epoch: 3 | train_loss: 0.5876 | train_acc: 0.8917 | test_loss: 0.4909 | test_acc: 0.9500\n", + "# Epoch: 4 | train_loss: 0.4474 | train_acc: 0.9062 | test_loss: 0.4355 | test_acc: 0.9409\n", + "# Epoch: 5 | train_loss: 0.4290 | train_acc: 0.9104 | test_loss: 0.3915 | test_acc: 0.9443\n", + "# Epoch: 6 | train_loss: 0.4381 | train_acc: 0.8896 | test_loss: 0.3512 | test_acc: 0.9688\n", + "# Epoch: 7 | train_loss: 0.4245 | train_acc: 0.8771 | test_loss: 0.3268 | test_acc: 0.9563\n", + "# Epoch: 8 | train_loss: 0.3897 | train_acc: 0.8958 | test_loss: 0.3457 | test_acc: 0.9381\n", + "# Epoch: 9 | train_loss: 0.3749 | train_acc: 0.8812 | test_loss: 0.3129 | test_acc: 0.9131\n", + "# Epoch: 10 | train_loss: 0.3757 | train_acc: 0.8604 | test_loss: 0.2813 | test_acc: 0.9688" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 458 }, + "id": "g73VUMNfi8yO", + "outputId": "d57b3bb4-5d56-4ddf-f415-6e6612811e57" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "nwmoMhW8IqSu" - }, - "source": [ - "## Exercise 1. Make and time predictions with both feature extractor models on the test dataset using the GPU (`device=\"cuda\"`). \n", - "\n", - "* Compare the model's prediction times on GPU vs CPU - does this close the gap between them? As in, does making predictions on the GPU make the ViT feature extractor prediction times closer to the EffNetB2 feature extractor prediction times?\n", - "* You'll find code to do these steps in [section 5. Making predictions with our trained models and timing them](https://www.learnpytorch.io/09_pytorch_model_deployment/#5-making-predictions-with-our-trained-models-and-timing-them) and [section 6. Comparing model results, prediction times and size](https://www.learnpytorch.io/09_pytorch_model_deployment/#6-comparing-model-results-prediction-times-and-size)." + "data": { + "image/png": "", + "text/plain": [ + "
" ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "from helper_functions import plot_loss_curves\n", + "\n", + "plot_loss_curves(effnetb2_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jk25LUPyioIe" + }, + "source": [ + "### Preparing and training ViT feature extractor" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "1Bp3kSv8i1B1" + }, + "outputs": [], + "source": [ + "def create_vit_model(num_classes:int=3, \n", + " seed:int=42):\n", + " \"\"\"Creates a ViT-B/16 feature extractor model and transforms.\n", + "\n", + " Args:\n", + " num_classes (int, optional): number of target classes. Defaults to 3.\n", + " seed (int, optional): random seed value for output layer. Defaults to 42.\n", + "\n", + " Returns:\n", + " model (torch.nn.Module): ViT-B/16 feature extractor model. \n", + " transforms (torchvision.transforms): ViT-B/16 image transforms.\n", + " \"\"\"\n", + " # Create ViT_B_16 pretrained weights, transforms and model\n", + " weights = torchvision.models.ViT_B_16_Weights.DEFAULT\n", + " transforms = weights.transforms()\n", + " model = torchvision.models.vit_b_16(weights=weights)\n", + "\n", + " # Freeze all layers in model\n", + " for param in model.parameters():\n", + " param.requires_grad = False\n", + "\n", + " # Change classifier head to suit our needs (this will be trainable)\n", + " torch.manual_seed(seed)\n", + " model.heads = nn.Sequential(nn.Linear(in_features=768, # keep this the same as original model\n", + " out_features=num_classes)) # update to reflect target number of classes\n", + " \n", + " return model, transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "LNbhVc0BjR5X", + "outputId": "cff89102-6e62-4666-b22f-bf660eac5a65" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "P8DX4FnUe0lp" - }, - "source": [ - "### Train two models on Pizza, Steak, Sushi data\n", - "\n", - "Need:\n", - "* Trained EffNetB2 feature extractor \n", - "* Trained ViT feature extractor" + "data": { + "text/plain": [ + "ImageClassification(\n", + " crop_size=[224]\n", + " resize_size=[256]\n", + " mean=[0.485, 0.456, 0.406]\n", + " std=[0.229, 0.224, 0.225]\n", + " interpolation=InterpolationMode.BILINEAR\n", + ")" ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vit, vit_transforms = create_vit_model()\n", + "# vit\n", + "vit_transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "yKqlWhjcjcGh", + "outputId": "5258c1ca-31f2-4404-c837-ddd1ccd045f7" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "W9gGOufOfD5l" - }, - "source": [ - "### EffNetB2 \n", - "\n", - "See function creation source here: https://www.learnpytorch.io/09_pytorch_model_deployment/#31-creating-a-function-to-make-an-effnetb2-feature-extractor" + "data": { + "text/plain": [ + "(15, 5, ['pizza', 'steak', 'sushi'])" ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create dataloaders for ViT\n", + "from going_modular.going_modular import data_setup\n", + "\n", + "BATCH_SIZE = 32\n", + "train_dataloader_vit, test_dataloader_vit, class_names = data_setup.create_dataloaders(train_dir=train_dir,\n", + " test_dir=test_dir,\n", + " transform=vit_transforms,\n", + " batch_size=BATCH_SIZE)\n", + "\n", + "len(train_dataloader_vit), len(test_dataloader_vit), class_names" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 227, + "referenced_widgets": [ + "aedc4452fd6c4e369c3b9a97f2dc7f4a", + "d80e97a6b93f4c2fb10f1dd68ef7d593", + "3e5d069cc92e4043bf5910b929fc33c5", + "fbe8e188bfb04a6686661761dc6d81d2", + "c104170bf4694edf925f9a1fbabe3f0c", + "3dc2eed246ea4996ad7671f71eb47394", + "187c5d5bf68643fbbe754554c5ed9143", + "f91d279664c74d53bc460d05f0af5d39", + "a3b71aabdf9e42fc9517aa924b114531", + "d2e3fd807351478abeb4fd9440310999", + "9a69b9bbcca740d6811e51bbf2c7e0c7" + ] }, + "id": "G28CNZNzjpoR", + "outputId": "970b0956-a0b1-4975-d796-16df917cadaf" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "id": "UR-P1QaBfFoZ" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "aedc4452fd6c4e369c3b9a97f2dc7f4a", + "version_major": 2, + "version_minor": 0 }, - "outputs": [], - "source": [ - "def create_effnetb2_model(num_classes:int=3, \n", - " seed:int=42):\n", - " \"\"\"Creates an EfficientNetB2 feature extractor model and transforms.\n", - "\n", - " Args:\n", - " num_classes (int, optional): number of classes in the classifier head. \n", - " Defaults to 3.\n", - " seed (int, optional): random seed value. Defaults to 42.\n", - "\n", - " Returns:\n", - " model (torch.nn.Module): EffNetB2 feature extractor model. \n", - " transforms (torchvision.transforms): EffNetB2 image transforms.\n", - " \"\"\"\n", - " # 1, 2, 3. Create EffNetB2 pretrained weights, transforms and model\n", - " weights = torchvision.models.EfficientNet_B2_Weights.DEFAULT\n", - " transforms = weights.transforms()\n", - " model = torchvision.models.efficientnet_b2(weights=weights)\n", - "\n", - " # 4. Freeze all layers in base model\n", - " for param in model.parameters():\n", - " param.requires_grad = False\n", - "\n", - " # 5. Change classifier head with random seed for reproducibility\n", - " torch.manual_seed(seed)\n", - " model.classifier = nn.Sequential(\n", - " nn.Dropout(p=0.3, inplace=True),\n", - " nn.Linear(in_features=1408, out_features=num_classes),\n", - " )\n", - " \n", - " return model, transforms" + "text/plain": [ + " 0%| | 0/10 [00:00" ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plot_loss_curves(vit_results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aDdDMVm8j5gm" + }, + "source": [ + "### Get all the images from the test path\n", + "\n", + "Want to make predictions acrosss the test dataset images and time them on GPU to see if they're faster on GPU or CPU..." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "OlTkRkdyj8Fp", + "outputId": "21c5c064-4a9f-4286-d409-e4b6498b72ff" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "IWMbHg7pf24Y", - "outputId": "39d2b3f8-7796-4723-9e02-293ca9c395f6" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(15, 5, ['pizza', 'steak', 'sushi'])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create dataloaders for EffNetB2 \n", - "from going_modular.going_modular import data_setup\n", - "\n", - "BATCH_SIZE = 32\n", - "train_dataloader_effnetb2, test_dataloader_effnetb2, class_names = data_setup.create_dataloaders(train_dir=train_dir,\n", - " test_dir=test_dir,\n", - " transform=effnetb2_transforms,\n", - " batch_size=BATCH_SIZE)\n", - "\n", - "len(train_dataloader_effnetb2), len(test_dataloader_effnetb2), class_names" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 227, - "referenced_widgets": [ - "198a73324ffa4478afc64c011df8368c", - "d9c166e1d7d9461eb3d9fe0fa2ced5b1", - "8025ad7772fb40f69b2fddfd7eab62e6", - "984c980048f24b2cbfb45df1dc3c9bd7", - "583f6c14fb3b4125a7c4486782fe7a2f", - "254d130afee243edb15dba9198e14f95", - "350d659062d34f90b0a61aca7f07b108", - "6f375d622ce84f95959f1ec00ab5b4fb", - "1b7118878ac04c4ab3a5555f59aade61", - "f7ddad2e2e604fd8b0240683f54ad8a3", - "afdc91f716ec416db36ef586ce623942" - ] - }, - "id": "POcQESk6gulj", - "outputId": "2181b910-47c7-4e19-b574-607f400ef0bb" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "198a73324ffa4478afc64c011df8368c", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/10 [00:00" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "from helper_functions import plot_loss_curves\n", - "\n", - "plot_loss_curves(effnetb2_results)" + "data": { + "text/plain": [ + "['pizza', 'steak', 'sushi']" ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class_names" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 }, + "id": "l6I_Dz5Ak8TW", + "outputId": "4106f906-7809-425d-91ec-8421982f50b2" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "jk25LUPyioIe" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" }, - "source": [ - "### Preparing and training ViT feature extractor" + "text/plain": [ + "'cuda'" ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"cuda\" if torch.cuda.is_available() else \"cpu\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZT_GTmmTezW7" + }, + "outputs": [], + "source": [ + "import pathlib\n", + "import torch\n", + "\n", + "from PIL import Image\n", + "from timeit import default_timer as timer \n", + "from tqdm.auto import tqdm\n", + "from typing import List, Dict\n", + "\n", + "# 1. Create a function to return a list of dictionaries with sample, truth label, prediction, prediction probability and prediction time\n", + "def pred_and_store(paths: List[pathlib.Path], \n", + " model: torch.nn.Module,\n", + " transform: torchvision.transforms, \n", + " class_names: List[str], \n", + " device: str = \"cuda\" if torch.cuda.is_available() else \"cpu\") -> List[Dict]:\n", + " \n", + " # 2. Create an empty list to store prediction dictionaires\n", + " pred_list = []\n", + " \n", + " # 3. Loop through target paths\n", + " for path in tqdm(paths):\n", + " \n", + " # 4. Create empty dictionary to store prediction information for each sample\n", + " pred_dict = {}\n", + "\n", + " # 5. Get the sample path and ground truth class name\n", + " pred_dict[\"image_path\"] = path\n", + " class_name = path.parent.stem\n", + " pred_dict[\"class_name\"] = class_name\n", + " \n", + " # 6. Start the prediction timer\n", + " start_time = timer()\n", + " \n", + " # 7. Open image path\n", + " img = Image.open(path)\n", + " \n", + " # 8. Transform the image, add batch dimension and put image on target device\n", + " transformed_image = transform(img).unsqueeze(0).to(device) \n", + " \n", + " # 9. Prepare model for inference by sending it to target device and turning on eval() mode\n", + " model = model.to(device)\n", + " model.eval()\n", + " \n", + " # 10. Get prediction probability, predicition label and prediction class\n", + " with torch.inference_mode():\n", + " pred_logit = model(transformed_image) # perform inference on target sample \n", + " pred_prob = torch.softmax(pred_logit, dim=1) # turn logits into prediction probabilities\n", + " pred_label = torch.argmax(pred_prob, dim=1) # turn prediction probabilities into prediction label\n", + " pred_class = class_names[pred_label.cpu()] # hardcode prediction class to be on CPU\n", + "\n", + " # 11. Make sure things in the dictionary are on CPU (required for inspecting predictions later on) \n", + " pred_dict[\"pred_prob\"] = round(pred_prob.max().cpu().item(), 4)\n", + " pred_dict[\"pred_class\"] = pred_class\n", + " \n", + " # 12. End the timer and calculate time per pred\n", + " end_time = timer()\n", + " pred_dict[\"time_for_pred\"] = round(end_time-start_time, 4)\n", + "\n", + " # 13. Does the pred match the true label?\n", + " pred_dict[\"correct\"] = class_name == pred_class\n", + "\n", + " # 14. Add the dictionary to the list of preds\n", + " pred_list.append(pred_dict)\n", + " \n", + " # 15. Return list of prediction dictionaries\n", + " return pred_list" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "xORYjBaRlV-G", + "outputId": "c88c6ed0-6fb7-4470-c4fa-2b2639735c60" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "id": "1Bp3kSv8i1B1" - }, - "outputs": [], - "source": [ - "def create_vit_model(num_classes:int=3, \n", - " seed:int=42):\n", - " \"\"\"Creates a ViT-B/16 feature extractor model and transforms.\n", - "\n", - " Args:\n", - " num_classes (int, optional): number of target classes. Defaults to 3.\n", - " seed (int, optional): random seed value for output layer. Defaults to 42.\n", - "\n", - " Returns:\n", - " model (torch.nn.Module): ViT-B/16 feature extractor model. \n", - " transforms (torchvision.transforms): ViT-B/16 image transforms.\n", - " \"\"\"\n", - " # Create ViT_B_16 pretrained weights, transforms and model\n", - " weights = torchvision.models.ViT_B_16_Weights.DEFAULT\n", - " transforms = weights.transforms()\n", - " model = torchvision.models.vit_b_16(weights=weights)\n", - "\n", - " # Freeze all layers in model\n", - " for param in model.parameters():\n", - " param.requires_grad = False\n", - "\n", - " # Change classifier head to suit our needs (this will be trainable)\n", - " torch.manual_seed(seed)\n", - " model.heads = nn.Sequential(nn.Linear(in_features=768, # keep this the same as original model\n", - " out_features=num_classes)) # update to reflect target number of classes\n", - " \n", - " return model, transforms" + "data": { + "text/plain": [ + "ImageClassification(\n", + " crop_size=[288]\n", + " resize_size=[288]\n", + " mean=[0.485, 0.456, 0.406]\n", + " std=[0.229, 0.224, 0.225]\n", + " interpolation=InterpolationMode.BICUBIC\n", + ")" ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "effnetb2_transforms" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "btrxf2A3lg8i" + }, + "source": [ + "### Make and time predictions on CPU" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "1e97fcc5d26e4a8dbef0d4f341f39f46", + "3b4eb48ac44e4cdc9310fa04adae8e00", + "6b4a4941520f4181bff6cd8394cb039f", + "ef9785a565e24c559819d32c72aa3a82", + "16f48f6e2af44c45a6c8bf7c62feb80e", + "f90632431c9e4de7b359b868810c5a7d", + "ab373d07afb942819cd636173630ba58", + "a051a4276dc34b12815cd37518213d81", + "e7bfad97ff2f4042800d5938f5d05113", + "a9c96bdd52a64bd187368d93885efdfb", + "3b703b1d4ce24f949fbc49870af349bf" + ] }, + "id": "pmDd_YZ7VSrL", + "outputId": "729ce303-0fad-4332-e421-29efe8f00192" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "LNbhVc0BjR5X", - "outputId": "cff89102-6e62-4666-b22f-bf660eac5a65" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1e97fcc5d26e4a8dbef0d4f341f39f46", + "version_major": 2, + "version_minor": 0 }, - "outputs": [ - { - "data": { - "text/plain": [ - "ImageClassification(\n", - " crop_size=[224]\n", - " resize_size=[256]\n", - " mean=[0.485, 0.456, 0.406]\n", - " std=[0.229, 0.224, 0.225]\n", - " interpolation=InterpolationMode.BILINEAR\n", - ")" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "vit, vit_transforms = create_vit_model()\n", - "# vit\n", - "vit_transforms" + "text/plain": [ + " 0%| | 0/150 [00:00" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_loss_curves(vit_results)" + "text/plain": [ + " 0%| | 0/150 [00:00\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
effnetb2_cpuvit_cpueffnetb2_gpuvit_gpu
00.2328740.5389530.0362630.019143
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " " ], - "source": [ - "from pathlib import Path\n", - "test_image_paths = list(Path(test_dir).glob(\"*/*.jpg\"))\n", - "len(test_image_paths)" + "text/plain": [ + " effnetb2_cpu vit_cpu effnetb2_gpu vit_gpu\n", + "0 0.232874 0.538953 0.036263 0.019143" ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "effnetb2_gpu_pred_time = get_mean_pred_time(effnetb2_preds_on_gpu)\n", + "vit_cpu_pred_time = get_mean_pred_time(vit_preds_on_cpu)\n", + "vit_gpu_pred_time = get_mean_pred_time(vit_preds_on_gpu)\n", + "\n", + "pred_times = {\"effnetb2_cpu\": effnetb2_cpu_pred_time,\n", + " \"vit_cpu\": vit_cpu_pred_time,\n", + " \"effnetb2_gpu\": effnetb2_gpu_pred_time,\n", + " \"vit_gpu\": vit_gpu_pred_time}\n", + "\n", + "pred_times_df = pd.DataFrame(pred_times, index=[0])\n", + "pred_times_df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xt7IzAngnPh_" + }, + "source": [ + "It looks like the predictions on the GPU are much faster than the CPU overall.\n", + "\n", + "And it looks like the ViT model is faster than EffNetB2 on the GPU as well.\n", + "\n", + "So potentially if we had access to a GPU in deployment, a ViT model would be better due to having lower latency (prediction time) as well as better performance.\n", + "\n", + "But if we're focused on deploying to CPU, EffNetB2 wins because of good performance + faster inference time." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MBWnDZao9w_5" + }, + "source": [ + "## Exercise 2. The ViT feature extractor seems to have more learning capacity (due to more parameters) than EffNetB2, how does it go on the larger 20% split of the entire Food101 dataset?\n", + "\n", + "* Train a ViT feature extractor on the 20% Food101 dataset for 5 epochs, just like we did with EffNetB2 in section [10. Creating FoodVision Big](https://www.learnpytorch.io/09_pytorch_model_deployment/#10-creating-foodvision-big).\n", + "\n", + "Want to download and split whole Food101 dataset into 20% dataset.\n", + "\n", + "E.g. instead of training on all ~100,000 images in Food101, only train and test on ~20,000 (to save time experimenting)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AdtXlrposFRt" + }, + "source": [ + "### Create ViT feature extractor for Food101\n", + "\n", + "Need to get a ViT model capable of fitting on Food101 data (freeze the base layers and update the output layers to work with 101 classes)." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "TZsxj0F3qYZj", + "outputId": "4f4a702e-908c-4bf3-d492-9eede2ec5101" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 23, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "MFs4wubEksKd", - "outputId": "6765bd64-9283-492e-e9eb-6af889ffb9a5" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[PosixPath('data/pizza_steak_sushi_20_percent/test/sushi/715227.jpg'),\n", - " PosixPath('data/pizza_steak_sushi_20_percent/test/sushi/3401466.jpg'),\n", - " PosixPath('data/pizza_steak_sushi_20_percent/test/sushi/2948087.jpg'),\n", - " PosixPath('data/pizza_steak_sushi_20_percent/test/sushi/1203702.jpg'),\n", - " PosixPath('data/pizza_steak_sushi_20_percent/test/sushi/511818.jpg')]" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_image_paths[:5]" + "data": { + "text/plain": [ + "======================================================================================================================================================\n", + "Layer (type (var_name)) Input Shape Output Shape Param # Trainable\n", + "======================================================================================================================================================\n", + "VisionTransformer (VisionTransformer) [1, 3, 224, 224] [1, 101] 768 Partial\n", + "├─Conv2d (conv_proj) [1, 3, 224, 224] [1, 768, 14, 14] (590,592) False\n", + "├─Encoder (encoder) [1, 197, 768] [1, 197, 768] 151,296 False\n", + "│ └─Dropout (dropout) [1, 197, 768] [1, 197, 768] -- --\n", + "│ └─Sequential (layers) [1, 197, 768] [1, 197, 768] -- False\n", + "│ │ └─EncoderBlock (encoder_layer_0) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_1) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_2) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_3) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_4) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_5) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_6) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_7) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_8) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_9) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_10) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ │ └─EncoderBlock (encoder_layer_11) [1, 197, 768] [1, 197, 768] (7,087,872) False\n", + "│ └─LayerNorm (ln) [1, 197, 768] [1, 197, 768] (1,536) False\n", + "├─Sequential (heads) [1, 768] [1, 101] -- True\n", + "│ └─Linear (0) [1, 768] [1, 101] 77,669 True\n", + "======================================================================================================================================================\n", + "Total params: 85,876,325\n", + "Trainable params: 77,669\n", + "Non-trainable params: 85,798,656\n", + "Total mult-adds (M): 172.54\n", + "======================================================================================================================================================\n", + "Input size (MB): 0.60\n", + "Forward/backward pass size (MB): 104.09\n", + "Params size (MB): 257.85\n", + "Estimated Total Size (MB): 362.54\n", + "======================================================================================================================================================" ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create another ViT feature model instance\n", + "vit_food101_20_percent, vit_transforms = create_vit_model(num_classes=101)\n", + "\n", + "# Print ViT model summary (uncomment for full output) \n", + "from torchinfo import summary\n", + "summary(vit_food101_20_percent, \n", + " input_size=(1, 3, 224, 224),\n", + " col_names=[\"input_size\", \"output_size\", \"num_params\", \"trainable\"],\n", + " col_width=20,\n", + " row_settings=[\"var_names\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yfhMWkQur5EI" + }, + "source": [ + "### Create Food101 data transforms \n", + "\n", + "Because of the large amount of data, going to use data augmentation to (hopefully) prevent overfitting.\n", + "\n", + "See here: https://www.learnpytorch.io/04_pytorch_custom_datasets/#81-how-to-deal-with-overfitting" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "6pyYhCpsn8bI", + "outputId": "09ac890d-90e1-43c0-96c0-5265a96bb56d" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "AoZQAC1ZfGBO" - }, - "source": [ - "### Get function for making predictions and timing them\n", - "\n", - "See the source here: https://www.learnpytorch.io/09_pytorch_model_deployment/#51-creating-a-function-to-make-predictions-across-the-test-dataset" + "data": { + "text/plain": [ + "Compose(\n", + " TrivialAugmentWide(num_magnitude_bins=31, interpolation=InterpolationMode.NEAREST, fill=None)\n", + " ImageClassification(\n", + " crop_size=[224]\n", + " resize_size=[256]\n", + " mean=[0.485, 0.456, 0.406]\n", + " std=[0.229, 0.224, 0.225]\n", + " interpolation=InterpolationMode.BILINEAR\n", + ")\n", + ")" ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create Food101 training data transforms (only perform data augmentation on the training images)\n", + "food101_train_transforms = torchvision.transforms.Compose([\n", + " torchvision.transforms.TrivialAugmentWide(),\n", + " vit_transforms,\n", + "])\n", + "\n", + "food101_train_transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "id": "NFXVZNCzVYgV" + }, + "outputs": [], + "source": [ + "from torchvision import datasets\n", + "\n", + "# Setup data directory\n", + "from pathlib import Path\n", + "data_dir = Path(\"data\")\n", + "\n", + "# Get training data (~750 images x 101 food classes)\n", + "train_data = datasets.Food101(root=data_dir, # path to download data to\n", + " split=\"train\", # dataset split to get\n", + " transform=food101_train_transforms, # perform data augmentation on training data\n", + " download=True) # want to download?\n", + "\n", + "# Get testing data (~250 images x 101 food classes)\n", + "test_data = datasets.Food101(root=data_dir,\n", + " split=\"test\",\n", + " transform=vit_transforms, # perform normal ViT transforms on test data\n", + " download=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "kmuPigfPoOl5", + "outputId": "20dd7ada-4b6e-49fa-f663-9e9fc556268f" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 24, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "DdbrRRfKk4FZ", - "outputId": "9c7eb41b-45aa-4327-82dc-9991d7226368" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['pizza', 'steak', 'sushi']" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "class_names" + "data": { + "text/plain": [ + "['apple_pie',\n", + " 'baby_back_ribs',\n", + " 'baklava',\n", + " 'beef_carpaccio',\n", + " 'beef_tartare',\n", + " 'beet_salad',\n", + " 'beignets',\n", + " 'bibimbap',\n", + " 'bread_pudding',\n", + " 'breakfast_burrito']" ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get Food101 class names\n", + "food101_class_names = train_data.classes\n", + "\n", + "# View the first 10\n", + "food101_class_names[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9XRxmce1rt1O" + }, + "source": [ + "### Create Food101 20% data splits\n", + "\n", + "Want to split whole Food101 dataset into: \n", + "* Train set: 20% of whole original Food101 train dataset\n", + "* Test set: 20% of whole original Food101 test dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "SSJrk250olsV" + }, + "outputs": [], + "source": [ + "def split_dataset(dataset:torchvision.datasets, split_size:float=0.2, seed:int=42):\n", + " \"\"\"Randomly splits a given dataset into two proportions based on split_size and seed.\n", + "\n", + " Args:\n", + " dataset (torchvision.datasets): A PyTorch Dataset, typically one from torchvision.datasets.\n", + " split_size (float, optional): How much of the dataset should be split? \n", + " E.g. split_size=0.2 means there will be a 20% split and an 80% split. Defaults to 0.2.\n", + " seed (int, optional): Seed for random generator. Defaults to 42.\n", + "\n", + " Returns:\n", + " tuple: (random_split_1, random_split_2) where random_split_1 is of size split_size*len(dataset) and \n", + " random_split_2 is of size (1-split_size)*len(dataset).\n", + " \"\"\"\n", + " # Create split lengths based on original dataset length\n", + " length_1 = int(len(dataset) * split_size) # desired length\n", + " length_2 = len(dataset) - length_1 # remaining length\n", + " \n", + " # Print out info\n", + " print(f\"[INFO] Splitting dataset of length {len(dataset)} into splits of size: {length_1} ({int(split_size*100)}%), {length_2} ({int((1-split_size)*100)}%)\")\n", + " \n", + " # Create splits with given random seed\n", + " random_split_1, random_split_2 = torch.utils.data.random_split(dataset, \n", + " lengths=[length_1, length_2],\n", + " generator=torch.manual_seed(seed)) # set the random seed for reproducible splits\n", + " return random_split_1, random_split_2" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "yve-zgQDpFci", + "outputId": "bfaf02cd-2847-42d3-80c5-6dfd4f657c51" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 25, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 36 - }, - "id": "l6I_Dz5Ak8TW", - "outputId": "4106f906-7809-425d-91ec-8421982f50b2" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - }, - "text/plain": [ - "'cuda'" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"cuda\" if torch.cuda.is_available() else \"cpu\"" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "[INFO] Splitting dataset of length 75750 into splits of size: 15150 (20%), 60600 (80%)\n", + "[INFO] Splitting dataset of length 25250 into splits of size: 5050 (20%), 20200 (80%)\n" + ] }, { - "cell_type": "code", - "execution_count": 26, - "metadata": { - "id": "ZT_GTmmTezW7" - }, - "outputs": [], - "source": [ - "import pathlib\n", - "import torch\n", - "\n", - "from PIL import Image\n", - "from timeit import default_timer as timer \n", - "from tqdm.auto import tqdm\n", - "from typing import List, Dict\n", - "\n", - "# 1. Create a function to return a list of dictionaries with sample, truth label, prediction, prediction probability and prediction time\n", - "def pred_and_store(paths: List[pathlib.Path], \n", - " model: torch.nn.Module,\n", - " transform: torchvision.transforms, \n", - " class_names: List[str], \n", - " device: str = \"cuda\" if torch.cuda.is_available() else \"cpu\") -> List[Dict]:\n", - " \n", - " # 2. Create an empty list to store prediction dictionaires\n", - " pred_list = []\n", - " \n", - " # 3. Loop through target paths\n", - " for path in tqdm(paths):\n", - " \n", - " # 4. Create empty dictionary to store prediction information for each sample\n", - " pred_dict = {}\n", - "\n", - " # 5. Get the sample path and ground truth class name\n", - " pred_dict[\"image_path\"] = path\n", - " class_name = path.parent.stem\n", - " pred_dict[\"class_name\"] = class_name\n", - " \n", - " # 6. Start the prediction timer\n", - " start_time = timer()\n", - " \n", - " # 7. Open image path\n", - " img = Image.open(path)\n", - " \n", - " # 8. Transform the image, add batch dimension and put image on target device\n", - " transformed_image = transform(img).unsqueeze(0).to(device) \n", - " \n", - " # 9. Prepare model for inference by sending it to target device and turning on eval() mode\n", - " model = model.to(device)\n", - " model.eval()\n", - " \n", - " # 10. Get prediction probability, predicition label and prediction class\n", - " with torch.inference_mode():\n", - " pred_logit = model(transformed_image) # perform inference on target sample \n", - " pred_prob = torch.softmax(pred_logit, dim=1) # turn logits into prediction probabilities\n", - " pred_label = torch.argmax(pred_prob, dim=1) # turn prediction probabilities into prediction label\n", - " pred_class = class_names[pred_label.cpu()] # hardcode prediction class to be on CPU\n", - "\n", - " # 11. Make sure things in the dictionary are on CPU (required for inspecting predictions later on) \n", - " pred_dict[\"pred_prob\"] = round(pred_prob.unsqueeze(0).max().cpu().item(), 4)\n", - " pred_dict[\"pred_class\"] = pred_class\n", - " \n", - " # 12. End the timer and calculate time per pred\n", - " end_time = timer()\n", - " pred_dict[\"time_for_pred\"] = round(end_time-start_time, 4)\n", - "\n", - " # 13. Does the pred match the true label?\n", - " pred_dict[\"correct\"] = class_name == pred_class\n", - "\n", - " # 14. Add the dictionary to the list of preds\n", - " pred_list.append(pred_dict)\n", - " \n", - " # 15. Return list of prediction dictionaries\n", - " return pred_list" + "data": { + "text/plain": [ + "(15150, 5050)" ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create training 20% split of Food101\n", + "train_data_food101_20_percent, _ = split_dataset(dataset=train_data,\n", + " split_size=0.2)\n", + "\n", + "# Create testing 20% split of Food101\n", + "test_data_food101_20_percent, _ = split_dataset(dataset=test_data,\n", + " split_size=0.2)\n", + "\n", + "len(train_data_food101_20_percent), len(test_data_food101_20_percent)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vUv8YLfnrqoZ" + }, + "source": [ + "### Create DataLoaders for Food101 20 percent data" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "tXThBOJfpROq", + "outputId": "47f541c0-278e-4579-fc97-e14de76a5a9c" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 27, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "xORYjBaRlV-G", - "outputId": "c88c6ed0-6fb7-4470-c4fa-2b2639735c60" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "ImageClassification(\n", - " crop_size=[288]\n", - " resize_size=[288]\n", - " mean=[0.485, 0.456, 0.406]\n", - " std=[0.229, 0.224, 0.225]\n", - " interpolation=InterpolationMode.BICUBIC\n", - ")" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "effnetb2_transforms" + "data": { + "text/plain": [ + "(474, 158)" ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Turn torch Datasets into DataLoaders\n", + "from torch.utils.data import DataLoader\n", + "\n", + "BATCH_SIZE = 32\n", + "NUM_WORKERS = 2\n", + "train_dataloader_food101 = DataLoader(train_data_food101_20_percent,\n", + " batch_size=BATCH_SIZE,\n", + " shuffle=True,\n", + " num_workers=NUM_WORKERS)\n", + "\n", + "test_dataloader_food101 = DataLoader(test_data_food101_20_percent,\n", + " batch_size=BATCH_SIZE,\n", + " shuffle=False,\n", + " num_workers=NUM_WORKERS)\n", + "\n", + "len(train_dataloader_food101), len(test_dataloader_food101)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3ED6j6HgqMYy" + }, + "source": [ + "### Train ViT feature extractor on 20% of Food101 data\n", + "\n", + "**Note:** The cell below may take 15 mins to run on Google Colab (due to ~15,000 training images and ~5000 testing images)." + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 138, + "referenced_widgets": [ + "1650ba7f3c7a4bab89245cadb89937aa", + "e2efe6b3123e489988806acd74d20f0a", + "aed7142a2eee4822b96b65e38f703a97", + "41cbdb7ebcc8475981b9183190470fbf", + "3bb210096b4a43a7abcaf4ed911d6e00", + "1578fb055b2b409a9a66b07471aa21c3", + "98988f14959f4ca49d7af3d9b5929a0c", + "67080ebd1ca04cf7bdc642a9ea3b0697", + "9c2aa13be594439f97e7fea01fbb0afb", + "4cb5a209da54421082b58fa407bc26e9", + "5c7f60cde457493fa6d8f914accb00c1" + ] }, + "id": "d08a7xh1qPGd", + "outputId": "63f439b6-4a4a-46d7-8ca5-0f90a3a9697a" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "btrxf2A3lg8i" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "1650ba7f3c7a4bab89245cadb89937aa", + "version_major": 2, + "version_minor": 0 }, - "source": [ - "### Make and time predictions on CPU" + "text/plain": [ + " 0%| | 0/5 [00:00\n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1510, in __del__\n", + " self._shutdown_workers()\n", + " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1493, in _shutdown_workers\n", + " if w.is_alive():\n", + " File \"/usr/lib/python3.7/multiprocessing/process.py\", line 151, in is_alive\n", + " assert self._parent_pid == os.getpid(), 'can only test a child process'\n", + "AssertionError: can only test a child process\n", + "Exception ignored in: \n", + "Traceback (most recent call last):\n", + " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1510, in __del__\n", + " self._shutdown_workers()\n", + " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1493, in _shutdown_workers\n", + " if w.is_alive():\n", + " File \"/usr/lib/python3.7/multiprocessing/process.py\", line 151, in is_alive\n", + " assert self._parent_pid == os.getpid(), 'can only test a child process'\n", + "AssertionError: can only test a child process\n" + ] + } + ], + "source": [ + "# Loop through test DataLoader (with batch size 1)\n", + "# Make prediction with model\n", + "# Store prediction and prediction probability to dictionary\n", + "# Append dictionary to list\n", + "# Inspect list\n", + "\n", + "# Easy way: set up batch size of 1 of test data loader - from Sali1997s\n", + "# Create batch size of 1 (predict on 1 image at a time)\n", + "test_dataloader_food101_batch_size_1 = DataLoader(test_data_food101_20_percent,\n", + " batch_size=1,\n", + " shuffle=False,\n", + " num_workers=NUM_WORKERS)\n", + "\n", + "# Prepare model (do this outside the loop)\n", + "vit_food101_20_percent = vit_food101_20_percent.to(device)\n", + "vit_food101_20_percent.eval()\n", + "\n", + "# Loop through test DataLoader with batch size 1 and make predictions on each image\n", + "# store predictions and truth values to a dictionary and then append dictionary to list for inspection later\n", + "vit_food101_pred_list = []\n", + "for X, y in tqdm(test_dataloader_food101_batch_size_1):\n", + " # Send data to target device\n", + " X, y = X.to(device), y.to(device)\n", + " \n", + " # Create empty prediction dictionary (each sample gets a dictionary)\n", + " pred_dict = {} \n", + "\n", + " # Make predictions\n", + " with torch.inference_mode():\n", + " pred_probs = torch.softmax(vit_food101_20_percent(X), dim=1)\n", + " pred_labels = torch.argmax(pred_probs, dim=1)\n", + " pred_dict[\"pred_prob\"] = torch.max(pred_probs).cpu().numpy()\n", + " pred_dict[\"pred_label\"] = pred_labels.cpu().numpy()[0]\n", + " pred_dict[\"label\"] = y.cpu().numpy()[0]\n", + " \n", + " vit_food101_pred_list.append(pred_dict)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "aggsGHYF2Wrv", + "outputId": "e89209d6-4d74-46a7-a068-344615623344" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 30, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 81, - "referenced_widgets": [ - "95eacc9dbddd4a56a882071b1b96a73d", - "21362da1989e4a4aa4c84dbac552115f", - "41b025a486c64cd390339bcfe21fd62a", - "8fe414cbc5a7495180415c0754c526ec", - "3f2f13e0559a4fafb4d759970be46bd6", - "694e8f3018e6426ebe598917a920a3e8", - "dfe279159de44764b1f52f7ba6c161af", - "e55a11a7724e475a8249e578083cd3de", - "20235d0a25b942cbbe3a43b0e79b084e", - "54ca6b2cd1ac478cb3aa9e09eb87e413", - "32fc7b9eff2f463a9e28b3aa7369500f", - "b891ed053df248d1865c95eeaf13fff1", - "23223f72b27f4f85bc580f5732ae97c0", - "d86109f9c9e643a3bcf15486c305f635", - "e857367fa306441ca7ca9579e906e066", - "ba94ef6625db42208129c71d5f77f950", - "87046b1dec5f4189b52f517d5aed0614", - "696eb95e9f224eefa80e8d34841ac798", - "3f1d000380fd4a2e9cd823acf803883e", - "272fd9fb794347ab855f544acc3a720c", - "a70381f756144655b998a514ae83bd38", - "d88fef0654ba494c840cef7f23c86651" - ] - }, - "id": "qBAGto93lpe4", - "outputId": "fa9d41f5-2121-4d6c-ede8-fd2ffb1ebe94" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "95eacc9dbddd4a56a882071b1b96a73d", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/150 [00:00\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pred_probpred_labellabelis_correctpred_classlabel_class
00.79484357272Truepancakespancakes
10.61369125757Truehummushummus
20.192828738080Truepulled_pork_sandwichpulled_pork_sandwich
30.581697645151Trueguacamoleguacamole
40.8230461511Truebaby_back_ribsbaby_back_ribs
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + " \n", + " " + ], + "text/plain": [ + " pred_prob pred_label label is_correct pred_class \\\n", + "0 0.7948435 72 72 True pancakes \n", + "1 0.6136912 57 57 True hummus \n", + "2 0.19282873 80 80 True pulled_pork_sandwich \n", + "3 0.58169764 51 51 True guacamole \n", + "4 0.82304615 1 1 True baby_back_ribs \n", + "\n", + " label_class \n", + "0 pancakes \n", + "1 hummus \n", + "2 pulled_pork_sandwich \n", + "3 guacamole \n", + "4 baby_back_ribs " ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create DataFrame with various columns\n", + "pred_df_20_percent = pd.DataFrame(vit_food101_pred_list)\n", + "\n", + "# Create a column for if the prediction is correct\n", + "pred_df_20_percent[\"is_correct\"] = pred_df_20_percent[\"pred_label\"] == pred_df_20_percent[\"label\"]\n", + "\n", + "# Create class name columns (e.g. pred_label=72 -> pred_class=\"pancakes\")\n", + "pred_df_20_percent[\"pred_class\"] = pred_df_20_percent[\"pred_label\"].apply(lambda x: food101_class_names[x])\n", + "pred_df_20_percent[\"label_class\"] = pred_df_20_percent[\"label\"].apply(lambda x: food101_class_names[x])\n", + "pred_df_20_percent.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 }, + "id": "2M3SUAsg2NRj", + "outputId": "faa3253c-663e-40fe-f937-857a6e4954da" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 31, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5eUw-OM6mNei", - "outputId": "97b3aa32-5f49-4833-cf20-9cfe19e3bc62" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0.23287399999999997" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pred_probpred_labellabelis_correctpred_classlabel_class
31070.98356385232Falsegyozadumplings
13060.94270467144Falsepaellafried_rice
28480.9298615418Falsefrench_onion_soupbread_pudding
7080.90341496912Falsebreakfast_burritocannoli
17260.8848796843Falseonion_ringsfried_calamari
34360.87869584558Falsefrozen_yogurtice_cream
22870.858544473828Falsefish_and_chipscroque_madame
32090.84964484936Falsebreakfast_burritofalafel
18720.849399037937Falseprime_ribfilet_mignon
13380.84500131551Falsecevicheguacamole
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " ], - "source": [ - "import pandas as pd\n", - "\n", - "def get_mean_pred_time(input):\n", - " df = pd.DataFrame(input)\n", - " return df.time_for_pred.mean()\n", - "\n", - "effnetb2_cpu_pred_time = get_mean_pred_time(effnetb2_preds_on_cpu)\n", - "effnetb2_cpu_pred_time" + "text/plain": [ + " pred_prob pred_label label is_correct pred_class \\\n", + "3107 0.9835638 52 32 False gyoza \n", + "1306 0.9427046 71 44 False paella \n", + "2848 0.9298615 41 8 False french_onion_soup \n", + "708 0.90341496 9 12 False breakfast_burrito \n", + "1726 0.884879 68 43 False onion_rings \n", + "3436 0.8786958 45 58 False frozen_yogurt \n", + "2287 0.85854447 38 28 False fish_and_chips \n", + "3209 0.84964484 9 36 False breakfast_burrito \n", + "1872 0.84939903 79 37 False prime_rib \n", + "1338 0.8450013 15 51 False ceviche \n", + "\n", + " label_class \n", + "3107 dumplings \n", + "1306 fried_rice \n", + "2848 bread_pudding \n", + "708 cannoli \n", + "1726 fried_calamari \n", + "3436 ice_cream \n", + "2287 croque_madame \n", + "3209 falafel \n", + "1872 filet_mignon \n", + "1338 guacamole " ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Get top 10 samples that are \"most wrong\", e.g. highest pred_prob but wrong prediction - why??\n", + "pred_df_20_percent[pred_df_20_percent[\"is_correct\"] == False].sort_values(\"pred_prob\", ascending=False)[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xx6wDwHZ45r_" + }, + "source": [ + "Comparing the \"pred_class\" to the \"label_class\" the model is often wrong on samples that are visually similar.\n", + "\n", + "For example, gyoza and dumplings look quite the same.\n", + "\n", + "The same as paella and fried rice.\n", + "\n", + "And onion rings and fried calamari.\n", + "\n", + "The model is getting confused on similar looking classes and thus predictions are in the right \"space\" but not necessarily correct compared to the ground truth." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LH-vHr3m9_oH" + }, + "source": [ + "## Exercise 4. Evaluate the ViT feature extractor across the whole Food101 test dataset rather than just the 20% version, how does it perform?\n", + "* Does it beat the original Food101 paper's best result of 56.4% accuracy?" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "k-oDd365w4A1", + "outputId": "6bfad8ba-a61c-43a5-fa17-d1c4ec5481aa" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 32, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 81 - }, - "id": "_Bnjfb4ymyLP", - "outputId": "daa59808-446a-4f85-b8fa-21d71c36c12c" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
effnetb2_cpuvit_cpueffnetb2_gpuvit_gpu
00.2328740.5389530.0362630.019143
\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ], - "text/plain": [ - " effnetb2_cpu vit_cpu effnetb2_gpu vit_gpu\n", - "0 0.232874 0.538953 0.036263 0.019143" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "effnetb2_gpu_pred_time = get_mean_pred_time(effnetb2_preds_on_gpu)\n", - "vit_cpu_pred_time = get_mean_pred_time(vit_preds_on_cpu)\n", - "vit_gpu_pred_time = get_mean_pred_time(vit_preds_on_gpu)\n", - "\n", - "pred_times = {\"effnetb2_cpu\": effnetb2_cpu_pred_time,\n", - " \"vit_cpu\": vit_cpu_pred_time,\n", - " \"effnetb2_gpu\": effnetb2_gpu_pred_time,\n", - " \"vit_gpu\": vit_gpu_pred_time}\n", - "\n", - "pred_times_df = pd.DataFrame(pred_times, index=[0])\n", - "pred_times_df" + "data": { + "text/plain": [ + "25250" ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check length of Food101 test data\n", + "len(test_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "czLfR8Pjw9gb", + "outputId": "9916f6a5-1107-42f9-d582-19d84bf67da4" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "xt7IzAngnPh_" - }, - "source": [ - "It looks like the predictions on the GPU are much faster than the CPU overall.\n", - "\n", - "And it looks like the ViT model is faster than EffNetB2 on the GPU as well.\n", - "\n", - "So potentially if we had access to a GPU in deployment, a ViT model would be better due to having lower latency (prediction time) as well as better performance.\n", - "\n", - "But if we're focused on deploying to CPU, EffNetB2 wins because of good performance + faster inference time." + "data": { + "text/plain": [ + "25250" ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Turn Food101 test data into DataLoader\n", + "# Easy way: set up batch size of 1 of test data loader - from Sali1997s\n", + "# Create batch size of 1 (predict on 1 image at a time)\n", + "test_dataloader_food101_all_data_batch_size_1 = DataLoader(test_data,\n", + " batch_size=1,\n", + " shuffle=False,\n", + " num_workers=NUM_WORKERS)\n", + "\n", + "len(test_dataloader_food101_all_data_batch_size_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "b972c74283244c4580489025f2306921", + "62c29ae67a5c4f658f74f94caf75abd8", + "fdaf41692b1b4205bbe2b8570051062f", + "80e05c26a564402da7e3b52fddc8845f", + "5b5b28d04c76476fb2bfa4a23bd241cb", + "d2686785dee340b489781070fd0e4ab8", + "5d717bf09ecb441e8e61e57d92a623df", + "eb61d02241d3438fbf44a3e629adfcb6", + "ba54748f87e840ba9a3fcc8b2f30c135", + "26bcaf624dd24d68aa15aef3a0866d2d", + "c7bbdad837be48179a660a2aea18005d" + ] }, + "id": "dWxceTz3VmeB", + "outputId": "64b278ab-60e3-40ab-c02d-380a7ab19ce5" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "MBWnDZao9w_5" + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b972c74283244c4580489025f2306921", + "version_major": 2, + "version_minor": 0 }, - "source": [ - "## Exercise 2. The ViT feature extractor seems to have more learning capacity (due to more parameters) than EffNetB2, how does it go on the larger 20% split of the entire Food101 dataset?\n", - "\n", - "* Train a ViT feature extractor on the 20% Food101 dataset for 5 epochs, just like we did with EffNetB2 in section [10. Creating FoodVision Big](https://www.learnpytorch.io/09_pytorch_model_deployment/#10-creating-foodvision-big).\n", - "\n", - "Want to download and split whole Food101 dataset into 20% dataset.\n", - "\n", - "E.g. instead of training on all ~100,000 images in Food101, only train and test on ~20,000 (to save time experimenting)." + "text/plain": [ + " 0%| | 0/25250 [00:00\n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1510, in __del__\n", - " self._shutdown_workers()\n", - " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1493, in _shutdown_workers\n", - " if w.is_alive():\n", - " File \"/usr/lib/python3.7/multiprocessing/process.py\", line 151, in is_alive\n", - " assert self._parent_pid == os.getpid(), 'can only test a child process'\n", - "AssertionError: can only test a child process\n", - "Exception ignored in: \n", - "Traceback (most recent call last):\n", - " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1510, in __del__\n", - " self._shutdown_workers()\n", - " File \"/usr/local/lib/python3.7/dist-packages/torch/utils/data/dataloader.py\", line 1493, in _shutdown_workers\n", - " if w.is_alive():\n", - " File \"/usr/lib/python3.7/multiprocessing/process.py\", line 151, in is_alive\n", - " assert self._parent_pid == os.getpid(), 'can only test a child process'\n", - "AssertionError: can only test a child process\n" - ] - } - ], - "source": [ - "# Loop through test DataLoader (with batch size 1)\n", - "# Make prediction with model\n", - "# Store prediction and prediction probability to dictionary\n", - "# Append dictionary to list\n", - "# Inspect list\n", - "\n", - "# Easy way: set up batch size of 1 of test data loader - from Sali1997s\n", - "# Create batch size of 1 (predict on 1 image at a time)\n", - "test_dataloader_food101_batch_size_1 = DataLoader(test_data_food101_20_percent,\n", - " batch_size=1,\n", - " shuffle=False,\n", - " num_workers=NUM_WORKERS)\n", - "\n", - "# Prepare model (do this outside the loop)\n", - "vit_food101_20_percent = vit_food101_20_percent.to(device)\n", - "vit_food101_20_percent.eval()\n", - "\n", - "# Loop through test DataLoader with batch size 1 and make predictions on each image\n", - "# store predictions and truth values to a dictionary and then append dictionary to list for inspection later\n", - "vit_food101_pred_list = []\n", - "for X, y in tqdm(test_dataloader_food101_batch_size_1):\n", - " # Send data to target device\n", - " X, y = X.to(device), y.to(device)\n", - " \n", - " # Create empty prediction dictionary (each sample gets a dictionary)\n", - " pred_dict = {} \n", - "\n", - " # Make predictions\n", - " with torch.inference_mode():\n", - " pred_probs = torch.softmax(vit_food101_20_percent(X), dim=1)\n", - " pred_labels = torch.argmax(pred_probs, dim=1)\n", - " pred_dict[\"pred_prob\"] = torch.max(pred_probs).cpu().numpy()\n", - " pred_dict[\"pred_label\"] = pred_labels.cpu().numpy()[0]\n", - " pred_dict[\"label\"] = y.cpu().numpy()[0]\n", - " \n", - " vit_food101_pred_list.append(pred_dict)" - ] + "34b0ac29e4744ec5866c3cf31c2cab57": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "aggsGHYF2Wrv", - "outputId": "e89209d6-4d74-46a7-a068-344615623344" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['apple_pie', 'baby_back_ribs', 'baklava', 'beef_carpaccio', 'beef_tartare']" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "food101_class_names[:5]" - ] + "350d659062d34f90b0a61aca7f07b108": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - }, - "id": "oF2kZZTG0KNo", - "outputId": "d8ea3694-7b41-4d4c-a1fa-980b6589e269" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pred_probpred_labellabelis_correctpred_classlabel_class
00.79484357272Truepancakespancakes
10.61369125757Truehummushummus
20.192828738080Truepulled_pork_sandwichpulled_pork_sandwich
30.581697645151Trueguacamoleguacamole
40.8230461511Truebaby_back_ribsbaby_back_ribs
\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ], - "text/plain": [ - " pred_prob pred_label label is_correct pred_class \\\n", - "0 0.7948435 72 72 True pancakes \n", - "1 0.6136912 57 57 True hummus \n", - "2 0.19282873 80 80 True pulled_pork_sandwich \n", - "3 0.58169764 51 51 True guacamole \n", - "4 0.82304615 1 1 True baby_back_ribs \n", - "\n", - " label_class \n", - "0 pancakes \n", - "1 hummus \n", - "2 pulled_pork_sandwich \n", - "3 guacamole \n", - "4 baby_back_ribs " - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Create DataFrame with various columns\n", - "pred_df_20_percent = pd.DataFrame(vit_food101_pred_list)\n", - "\n", - "# Create a column for if the prediction is correct\n", - "pred_df_20_percent[\"is_correct\"] = pred_df_20_percent[\"pred_label\"] == pred_df_20_percent[\"label\"]\n", - "\n", - "# Create class name columns (e.g. pred_label=72 -> pred_class=\"pancakes\")\n", - "pred_df_20_percent[\"pred_class\"] = pred_df_20_percent[\"pred_label\"].apply(lambda x: food101_class_names[x])\n", - "pred_df_20_percent[\"label_class\"] = pred_df_20_percent[\"label\"].apply(lambda x: food101_class_names[x])\n", - "pred_df_20_percent.head()" - ] + "3b4eb48ac44e4cdc9310fa04adae8e00": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f90632431c9e4de7b359b868810c5a7d", + "placeholder": "​", + "style": "IPY_MODEL_ab373d07afb942819cd636173630ba58", + "value": "100%" + } }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - }, - "id": "2M3SUAsg2NRj", - "outputId": "faa3253c-663e-40fe-f937-857a6e4954da" - }, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pred_probpred_labellabelis_correctpred_classlabel_class
31070.98356385232Falsegyozadumplings
13060.94270467144Falsepaellafried_rice
28480.9298615418Falsefrench_onion_soupbread_pudding
7080.90341496912Falsebreakfast_burritocannoli
17260.8848796843Falseonion_ringsfried_calamari
34360.87869584558Falsefrozen_yogurtice_cream
22870.858544473828Falsefish_and_chipscroque_madame
32090.84964484936Falsebreakfast_burritofalafel
18720.849399037937Falseprime_ribfilet_mignon
13380.84500131551Falsecevicheguacamole
\n", - "
\n", - " \n", - " \n", - " \n", - "\n", - " \n", - "
\n", - "
\n", - " " - ], - "text/plain": [ - " pred_prob pred_label label is_correct pred_class \\\n", - "3107 0.9835638 52 32 False gyoza \n", - "1306 0.9427046 71 44 False paella \n", - "2848 0.9298615 41 8 False french_onion_soup \n", - "708 0.90341496 9 12 False breakfast_burrito \n", - "1726 0.884879 68 43 False onion_rings \n", - "3436 0.8786958 45 58 False frozen_yogurt \n", - "2287 0.85854447 38 28 False fish_and_chips \n", - "3209 0.84964484 9 36 False breakfast_burrito \n", - "1872 0.84939903 79 37 False prime_rib \n", - "1338 0.8450013 15 51 False ceviche \n", - "\n", - " label_class \n", - "3107 dumplings \n", - "1306 fried_rice \n", - "2848 bread_pudding \n", - "708 cannoli \n", - "1726 fried_calamari \n", - "3436 ice_cream \n", - "2287 croque_madame \n", - "3209 falafel \n", - "1872 filet_mignon \n", - "1338 guacamole " - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Get top 10 samples that are \"most wrong\", e.g. highest pred_prob but wrong prediction - why??\n", - "pred_df_20_percent[pred_df_20_percent[\"is_correct\"] == False].sort_values(\"pred_prob\", ascending=False)[:10]" - ] + "3b703b1d4ce24f949fbc49870af349bf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "xx6wDwHZ45r_" - }, - "source": [ - "Comparing the \"pred_class\" to the \"label_class\" the model is often wrong on samples that are visually similar.\n", - "\n", - "For example, gyoza and dumplings look quite the same.\n", - "\n", - "The same as paella and fried rice.\n", - "\n", - "And onion rings and fried calamari.\n", - "\n", - "The model is getting confused on similar looking classes and thus predictions are in the right \"space\" but not necessarily correct compared to the ground truth." - ] + "3bb210096b4a43a7abcaf4ed911d6e00": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } }, - { - "cell_type": "markdown", - "metadata": { - "id": "LH-vHr3m9_oH" - }, - "source": [ - "## Exercise 4. Evaluate the ViT feature extractor across the whole Food101 test dataset rather than just the 20% version, how does it perform?\n", - "* Does it beat the original Food101 paper's best result of 56.4% accuracy?" - ] + "3cafb6d12ee149e3acedbfa568346e33": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "k-oDd365w4A1", - "outputId": "6bfad8ba-a61c-43a5-fa17-d1c4ec5481aa" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "25250" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } + "3dc2eed246ea4996ad7671f71eb47394": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3e5d069cc92e4043bf5910b929fc33c5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f91d279664c74d53bc460d05f0af5d39", + "max": 10, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_a3b71aabdf9e42fc9517aa924b114531", + "value": 10 + } + }, + "3f1d000380fd4a2e9cd823acf803883e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3f2f13e0559a4fafb4d759970be46bd6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "41b025a486c64cd390339bcfe21fd62a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e55a11a7724e475a8249e578083cd3de", + "max": 150, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_20235d0a25b942cbbe3a43b0e79b084e", + "value": 150 + } + }, + "41cbdb7ebcc8475981b9183190470fbf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4cb5a209da54421082b58fa407bc26e9", + "placeholder": "​", + "style": "IPY_MODEL_5c7f60cde457493fa6d8f914accb00c1", + "value": " 5/5 [15:31<00:00, 183.71s/it]" + } + }, + "4a57bee54360480f98bd88aaa8549d0b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c1364b3347f24575a7726165e145a054", + "placeholder": "​", + "style": "IPY_MODEL_8cc7a775f8e142a6b45317cd705c036a", + "value": "100%" + } + }, + "4a79c15365174feea8dc1658133756c5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4cb5a209da54421082b58fa407bc26e9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "52c543fb5696491f955f285bc4187b8d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "543117542e0c470c8b38b177669e3dd0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "54ca6b2cd1ac478cb3aa9e09eb87e413": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "583f6c14fb3b4125a7c4486782fe7a2f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5b5b28d04c76476fb2bfa4a23bd241cb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5c7f60cde457493fa6d8f914accb00c1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5d717bf09ecb441e8e61e57d92a623df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "607c1f043b7346edb799452c68c9e7b1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "62c29ae67a5c4f658f74f94caf75abd8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d2686785dee340b489781070fd0e4ab8", + "placeholder": "​", + "style": "IPY_MODEL_5d717bf09ecb441e8e61e57d92a623df", + "value": "100%" + } + }, + "656d68087a7b4e2397ae787e863ee428": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "67080ebd1ca04cf7bdc642a9ea3b0697": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "694e8f3018e6426ebe598917a920a3e8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "696eb95e9f224eefa80e8d34841ac798": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6b4a4941520f4181bff6cd8394cb039f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a051a4276dc34b12815cd37518213d81", + "max": 150, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_e7bfad97ff2f4042800d5938f5d05113", + "value": 150 + } + }, + "6f375d622ce84f95959f1ec00ab5b4fb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "76a9c2e5ee364f6ab19cf7ddcd1ef310": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8025ad7772fb40f69b2fddfd7eab62e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_6f375d622ce84f95959f1ec00ab5b4fb", + "max": 10, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_1b7118878ac04c4ab3a5555f59aade61", + "value": 10 + } + }, + "80e05c26a564402da7e3b52fddc8845f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_26bcaf624dd24d68aa15aef3a0866d2d", + "placeholder": "​", + "style": "IPY_MODEL_c7bbdad837be48179a660a2aea18005d", + "value": " 25250/25250 [05:38<00:00, 77.53it/s]" + } + }, + "87046b1dec5f4189b52f517d5aed0614": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8cc7a775f8e142a6b45317cd705c036a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8fe414cbc5a7495180415c0754c526ec": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_54ca6b2cd1ac478cb3aa9e09eb87e413", + "placeholder": "​", + "style": "IPY_MODEL_32fc7b9eff2f463a9e28b3aa7369500f", + "value": " 150/150 [00:05<00:00, 37.37it/s]" + } + }, + "95eacc9dbddd4a56a882071b1b96a73d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_21362da1989e4a4aa4c84dbac552115f", + "IPY_MODEL_41b025a486c64cd390339bcfe21fd62a", + "IPY_MODEL_8fe414cbc5a7495180415c0754c526ec" ], - "source": [ - "# Check length of Food101 test data\n", - "len(test_data)" - ] + "layout": "IPY_MODEL_3f2f13e0559a4fafb4d759970be46bd6" + } }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "czLfR8Pjw9gb", - "outputId": "9916f6a5-1107-42f9-d582-19d84bf67da4" - }, - "outputs": [ - { - "data": { - "text/plain": [ - "25250" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } + "984c980048f24b2cbfb45df1dc3c9bd7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f7ddad2e2e604fd8b0240683f54ad8a3", + "placeholder": "​", + "style": "IPY_MODEL_afdc91f716ec416db36ef586ce623942", + "value": " 10/10 [01:10<00:00, 5.99s/it]" + } + }, + "98988f14959f4ca49d7af3d9b5929a0c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9a69b9bbcca740d6811e51bbf2c7e0c7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "9c2aa13be594439f97e7fea01fbb0afb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a051a4276dc34b12815cd37518213d81": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a152e63878824439bb7f4fc916e80f5b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_52c543fb5696491f955f285bc4187b8d", + "placeholder": "​", + "style": "IPY_MODEL_f908c3aa76b9414795e0a7345ffaa7ca", + "value": " 5050/5050 [01:07<00:00, 40.17it/s]" + } + }, + "a3b71aabdf9e42fc9517aa924b114531": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "a70381f756144655b998a514ae83bd38": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a8bab731fc1b41ffb526072144e661cf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4a79c15365174feea8dc1658133756c5", + "max": 150, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_3cafb6d12ee149e3acedbfa568346e33", + "value": 150 + } + }, + "a9c96bdd52a64bd187368d93885efdfb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ab373d07afb942819cd636173630ba58": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "aed7142a2eee4822b96b65e38f703a97": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_67080ebd1ca04cf7bdc642a9ea3b0697", + "max": 5, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9c2aa13be594439f97e7fea01fbb0afb", + "value": 5 + } + }, + "aedc4452fd6c4e369c3b9a97f2dc7f4a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d80e97a6b93f4c2fb10f1dd68ef7d593", + "IPY_MODEL_3e5d069cc92e4043bf5910b929fc33c5", + "IPY_MODEL_fbe8e188bfb04a6686661761dc6d81d2" ], - "source": [ - "# Turn Food101 test data into DataLoader\n", - "# Easy way: set up batch size of 1 of test data loader - from Sali1997s\n", - "# Create batch size of 1 (predict on 1 image at a time)\n", - "test_dataloader_food101_all_data_batch_size_1 = DataLoader(test_data,\n", - " batch_size=1,\n", - " shuffle=False,\n", - " num_workers=NUM_WORKERS)\n", - "\n", - "len(test_dataloader_food101_all_data_batch_size_1)" - ] + "layout": "IPY_MODEL_c104170bf4694edf925f9a1fbabe3f0c" + } }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 49, - "referenced_widgets": [ - "b972c74283244c4580489025f2306921", - "62c29ae67a5c4f658f74f94caf75abd8", - "fdaf41692b1b4205bbe2b8570051062f", - "80e05c26a564402da7e3b52fddc8845f", - "5b5b28d04c76476fb2bfa4a23bd241cb", - "d2686785dee340b489781070fd0e4ab8", - "5d717bf09ecb441e8e61e57d92a623df", - "eb61d02241d3438fbf44a3e629adfcb6", - "ba54748f87e840ba9a3fcc8b2f30c135", - "26bcaf624dd24d68aa15aef3a0866d2d", - "c7bbdad837be48179a660a2aea18005d" - ] - }, - "id": "dWxceTz3VmeB", - "outputId": "64b278ab-60e3-40ab-c02d-380a7ab19ce5" - }, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "b972c74283244c4580489025f2306921", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/25250 [00:00