Merge pull request #9 from Giskard-AI/GSK-2248

[GSK-2248] TestResult to dict
Giskard-AI · Dec 14, 2023 · 35adb52 · 35adb52
2 parents 0f76a9f + 1821a0a
commit 35adb52
Show file tree

Hide file tree

Showing 7 changed files with 182 additions and 155 deletions.
diff --git a/examples/ex5_models_comparison.ipynb b/examples/ex5_models_comparison.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -30,18 +30,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "dl = DataLoader300W(dir_path=\"300W/sample\")\n",
     "chosen_idx = 4\n",
-    "image, ground_truth_landmarks, _ = ds[chosen_idx]"
+    "image, ground_truth_landmarks, _ = dl[chosen_idx]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -50,7 +50,7 @@
        "'cpu'"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -64,15 +64,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "File exists\n",
-      "File exists\n",
       "loading data from : lbfmodel.yaml\n"
      ]
     }
@@ -84,109 +82,60 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "               <h4><span style=\"color:green;\">✓</span> Test \"NME_mean\" succeeded</h4>\n",
-       "               <p>Description: Mean of normalised mean Euclidean distances across images</p>\n",
-       "               <p>Metric: <b>0.0414</b> (threshold = 1)</p>\n",
-       "               \n",
-       "               <p>Prediction time: 0.54 s.</p>\n",
-       "               "
-      ],
-      "text/plain": [
-       "\n",
-       "               Test \"NME_mean\" succeeded\n",
-       "               Description: Mean of normalised mean Euclidean distances across images\n",
-       "               Metric: 0.0414 (threshold = 1)\n",
-       "               \n",
-       "               Prediction time: 0.54 s.\n",
-       "               "
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "Test(metric=NMEMean, threshold=1).run(model=opencv_model, dataloader=dl)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "\n",
-       "               <h4><span style=\"color:green;\">✓</span> Test \"NME_mean\" succeeded</h4>\n",
-       "               <p>Description: Mean of normalised mean Euclidean distances across images</p>\n",
-       "               <p>Metric: <b>0.0623</b> (threshold = 1)</p>\n",
-       "               \n",
-       "               <p>Prediction time: 37.83 s.</p>\n",
-       "               "
-      ],
       "text/plain": [
-       "\n",
-       "               Test \"NME_mean\" succeeded\n",
-       "               Description: Mean of normalised mean Euclidean distances across images\n",
-       "               Metric: 0.0623 (threshold = 1)\n",
-       "               \n",
-       "               Prediction time: 37.83 s.\n",
-       "               "
+       "{'test_name': 'Test',\n",
+       " 'metric_name': 'NME_mean',\n",
+       " 'metric_value': 0.04136279942306024,\n",
+       " 'threshold': 1,\n",
+       " 'passed': True,\n",
+       " 'facial_part': 'entire face',\n",
+       " 'model_name': 'OpenCV',\n",
+       " 'dataloader_name': '300W'}"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "Test(metric=NMEMean, threshold=1).run(model=facealignment_model, dataloader=dl)"
+    "test = Test(metric=NMEMean, threshold=1).run(model=opencv_model, dataloader=dl)\n",
+    "\n",
+    "test.to_dict()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/bazire/loreal-poc/loreal_poc/tests/performance.py:47: RuntimeWarning: Mean of empty slice\n",
+      "/Users/rak/Documents/loreal-poc/loreal_poc/tests/performance.py:47: RuntimeWarning: Mean of empty slice\n",
       "  mes = np.nanmean(es, axis=1)\n"
      ]
     },
     {
      "data": {
-      "text/html": [
-       "\n",
-       "               <h4><span style=\"color:green;\">✓</span> Test \"NME_mean\" succeeded</h4>\n",
-       "               <p>Description: Mean of normalised mean Euclidean distances across images</p>\n",
-       "               <p>Metric: <b>0.6623</b> (threshold = 1)</p>\n",
-       "               Prediction fail rate: 0.4\n",
-       "               <p>Prediction time: 0.54 s.</p>\n",
-       "               "
-      ],
       "text/plain": [
-       "\n",
-       "               Test \"NME_mean\" succeeded\n",
-       "               Description: Mean of normalised mean Euclidean distances across images\n",
-       "               Metric: 0.6623 (threshold = 1)\n",
-       "               Prediction fail rate: 0.4\n",
-       "               Prediction time: 0.54 s.\n",
-       "               "
+       "{'test_name': 'TestDiff',\n",
+       " 'metric_name': 'NME_mean',\n",
+       " 'metric_value': 0.3984345106345805,\n",
+       " 'threshold': 1,\n",
+       " 'passed': True,\n",
+       " 'facial_part': 'left half',\n",
+       " 'model_name': 'OpenCV',\n",
+       " 'dataloader_name': '300W'}"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -196,87 +145,130 @@
     "\n",
     "facial_part = FacialParts.left_half\n",
     "\n",
-    "TestDiff(metric=NMEMean, threshold=1).run(\n",
+    "test_diff = TestDiff(metric=NMEMean, threshold=1).run(\n",
     "    model=opencv_model, dataloader=dl, dataloader_ref=CroppedDataLoader(dl, part=facial_part), facial_part=facial_part\n",
-    ")"
+    ")\n",
+    "test_diff.to_dict()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/bazire/loreal-poc/loreal_poc/tests/performance.py:85: RuntimeWarning: Mean of empty slice\n",
-      "  return np.nanmean(NMEs.get(prediction_result, marks))\n",
-      "/home/bazire/loreal-poc/.venv/lib/python3.10/site-packages/face_alignment/api.py:147: UserWarning: No faces were detected.\n",
-      "  warnings.warn(\"No faces were detected.\")\n"
+      "/Users/rak/Documents/loreal-poc/loreal_poc/tests/performance.py:85: RuntimeWarning: Mean of empty slice\n",
+      "  return np.nanmean(NMEs.get(prediction_result, marks))\n"
      ]
     }
    ],
    "source": [
-    "from dataclasses import fields\n",
+    "# from dataclasses import fields\n",
+    "def report(models, dataloader, tests, facial_parts):\n",
+    "    results = []\n",
+    "    for model in models:\n",
+    "        # for __facial_part in fields(FacialParts):\n",
+    "        #  _facial_part = __facial_part.default\n",
+    "        for facial_part in facial_parts:\n",
+    "            for test_cls in tests:\n",
+    "                test_result = test_cls(metric=NMEMean, threshold=1).run(\n",
+    "                    model=model,\n",
+    "                    dataloader=CroppedDataLoader(dataloader, part=facial_part),\n",
+    "                    dataloader_ref=dl,\n",
+    "                    facial_part=facial_part,\n",
+    "                )\n",
+    "                results.append(test_result.to_dict())\n",
+    "    return results\n",
+    "\n",
     "\n",
-    "results = {\"OpenCV\": {\"model\": opencv_model}, \"FaceAlignment\": {\"model\": facealignment_model}}\n",
-    "for model_name, _model in results.items():\n",
-    "    model = _model[\"model\"]\n",
-    "    for __facial_part in fields(FacialParts):\n",
-    "        _facial_part = __facial_part.default\n",
-    "        test = TestDiff(metric=NMEMean, threshold=1).run(\n",
-    "            model=model,\n",
-    "            dataloader=dl,\n",
-    "            dataloader_ref=CroppedDataLoader(dl, part=_facial_part),\n",
-    "            facial_part=_facial_part,\n",
-    "        )\n",
-    "        results[model_name][_facial_part.name] = test.metric"
+    "report = report([opencv_model], dl, [TestDiff], [FacialParts.bottom_half, FacialParts.upper_half])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>test_name</th>\n",
+       "      <th>metric_name</th>\n",
+       "      <th>metric_value</th>\n",
+       "      <th>threshold</th>\n",
+       "      <th>passed</th>\n",
+       "      <th>facial_part</th>\n",
+       "      <th>model_name</th>\n",
+       "      <th>dataloader_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>TestDiff</td>\n",
+       "      <td>NME_mean</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "      <td>bottom half</td>\n",
+       "      <td>OpenCV</td>\n",
+       "      <td>300W cropped on bottom half</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>TestDiff</td>\n",
+       "      <td>NME_mean</td>\n",
+       "      <td>0.040216</td>\n",
+       "      <td>1</td>\n",
+       "      <td>True</td>\n",
+       "      <td>upper half</td>\n",
+       "      <td>OpenCV</td>\n",
+       "      <td>300W cropped on upper half</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
       "text/plain": [
-       "{'OpenCV': {'model': <loreal_poc.models.wrappers.OpenCVWrapper at 0x7f75e93bf910>,\n",
-       "  'entire face': 0.031593074968095824,\n",
-       "  'face contour': 0.0524161528719839,\n",
-       "  'left eyebrow': nan,\n",
-       "  'right eyebrow': nan,\n",
-       "  'nose': nan,\n",
-       "  'left eye': nan,\n",
-       "  'right eye': nan,\n",
-       "  'mouth': nan,\n",
-       "  'bottom half': nan,\n",
-       "  'upper half': 0.04021610184657019,\n",
-       "  'left half': 0.662329401666445,\n",
-       "  'right half': 0.9675771331137769},\n",
-       " 'FaceAlignment': {'model': <loreal_poc.models.wrappers.FaceAlignmentWrapper at 0x7f75e93bc370>,\n",
-       "  'entire face': 0.06824182183674955,\n",
-       "  'face contour': 0.13120804359950317,\n",
-       "  'left eyebrow': nan,\n",
-       "  'right eyebrow': nan,\n",
-       "  'nose': nan,\n",
-       "  'left eye': 2.2704133968712634,\n",
-       "  'right eye': nan,\n",
-       "  'mouth': nan,\n",
-       "  'bottom half': 8.182222629236117e-05,\n",
-       "  'upper half': 0.22317472213175207,\n",
-       "  'left half': 0.6372027526669263,\n",
-       "  'right half': 0.3839864768052418}}"
+       "  test_name metric_name  metric_value  threshold  passed  facial_part  \\\n",
+       "0  TestDiff    NME_mean           NaN          1   False  bottom half   \n",
+       "1  TestDiff    NME_mean      0.040216          1    True   upper half   \n",
+       "\n",
+       "  model_name              dataloader_name  \n",
+       "0     OpenCV  300W cropped on bottom half  \n",
+       "1     OpenCV   300W cropped on upper half  "
       ]
      },
-     "execution_count": 10,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "results"
+    "import pandas as pd\n",
+    "\n",
+    "pd.DataFrame(report)"
    ]
   }
  ],
@@ -296,7 +288,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.11.6"
   }
  },
  "nbformat": 4,