From bc72174686f8b2f3d594269a1b8c3c8930fb995f Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 14:00:36 +0100
Subject: [PATCH 01/81] add duckdb dependency

---
 poetry.lock    | 74 ++++++++++++++++++++++++++++++++++++++++++++++++--
 pyproject.toml |  1 +
 2 files changed, 73 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index ac94a1f0b..6bf283c30 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
 
 [[package]]
 name = "airium"
@@ -642,6 +642,61 @@ files = [
     {file = "docutils-0.18.1.tar.gz", hash = "sha256:679987caf361a7539d76e584cbeddc311e3aee937877c87346f31debc63e9d06"},
 ]
 
+[[package]]
+name = "duckdb"
+version = "1.0.0"
+description = "DuckDB in-process database"
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:4a8ce2d1f9e1c23b9bab3ae4ca7997e9822e21563ff8f646992663f66d050211"},
+    {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:19797670f20f430196e48d25d082a264b66150c264c1e8eae8e22c64c2c5f3f5"},
+    {file = "duckdb-1.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:b71c342090fe117b35d866a91ad6bffce61cd6ff3e0cff4003f93fc1506da0d8"},
+    {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25dd69f44ad212c35ae2ea736b0e643ea2b70f204b8dff483af1491b0e2a4cec"},
+    {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8da5f293ecb4f99daa9a9352c5fd1312a6ab02b464653a0c3a25ab7065c45d4d"},
+    {file = "duckdb-1.0.0-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3207936da9967ddbb60644ec291eb934d5819b08169bc35d08b2dedbe7068c60"},
+    {file = "duckdb-1.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1128d6c9c33e883b1f5df6b57c1eb46b7ab1baf2650912d77ee769aaa05111f9"},
+    {file = "duckdb-1.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:02310d263474d0ac238646677feff47190ffb82544c018b2ff732a4cb462c6ef"},
+    {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:75586791ab2702719c284157b65ecefe12d0cca9041da474391896ddd9aa71a4"},
+    {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_universal2.whl", hash = "sha256:83bb415fc7994e641344f3489e40430ce083b78963cb1057bf714ac3a58da3ba"},
+    {file = "duckdb-1.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:bee2e0b415074e84c5a2cefd91f6b5ebeb4283e7196ba4ef65175a7cef298b57"},
+    {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fa5a4110d2a499312609544ad0be61e85a5cdad90e5b6d75ad16b300bf075b90"},
+    {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa389e6a382d4707b5f3d1bc2087895925ebb92b77e9fe3bfb23c9b98372fdc"},
+    {file = "duckdb-1.0.0-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7ede6f5277dd851f1a4586b0c78dc93f6c26da45e12b23ee0e88c76519cbdbe0"},
+    {file = "duckdb-1.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0b88cdbc0d5c3e3d7545a341784dc6cafd90fc035f17b2f04bf1e870c68456e5"},
+    {file = "duckdb-1.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:fd1693cdd15375156f7fff4745debc14e5c54928589f67b87fb8eace9880c370"},
+    {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:c65a7fe8a8ce21b985356ee3ec0c3d3b3b2234e288e64b4cfb03356dbe6e5583"},
+    {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:e5a8eda554379b3a43b07bad00968acc14dd3e518c9fbe8f128b484cf95e3d16"},
+    {file = "duckdb-1.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:a1b6acdd54c4a7b43bd7cb584975a1b2ff88ea1a31607a2b734b17960e7d3088"},
+    {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a677bb1b6a8e7cab4a19874249d8144296e6e39dae38fce66a80f26d15e670df"},
+    {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:752e9d412b0a2871bf615a2ede54be494c6dc289d076974eefbf3af28129c759"},
+    {file = "duckdb-1.0.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3aadb99d098c5e32d00dc09421bc63a47134a6a0de9d7cd6abf21780b678663c"},
+    {file = "duckdb-1.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83b7091d4da3e9301c4f9378833f5ffe934fb1ad2b387b439ee067b2c10c8bb0"},
+    {file = "duckdb-1.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:6a8058d0148b544694cb5ea331db44f6c2a00a7b03776cc4dd1470735c3d5ff7"},
+    {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e40cb20e5ee19d44bc66ec99969af791702a049079dc5f248c33b1c56af055f4"},
+    {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7bce1bc0de9af9f47328e24e6e7e39da30093179b1c031897c042dd94a59c8e"},
+    {file = "duckdb-1.0.0-cp37-cp37m-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8355507f7a04bc0a3666958f4414a58e06141d603e91c0fa5a7c50e49867fb6d"},
+    {file = "duckdb-1.0.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:39f1a46f5a45ad2886dc9b02ce5b484f437f90de66c327f86606d9ba4479d475"},
+    {file = "duckdb-1.0.0-cp37-cp37m-win_amd64.whl", hash = "sha256:a6d29ba477b27ae41676b62c8fae8d04ee7cbe458127a44f6049888231ca58fa"},
+    {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:1bea713c1925918714328da76e79a1f7651b2b503511498ccf5e007a7e67d49e"},
+    {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_universal2.whl", hash = "sha256:bfe67f3bcf181edbf6f918b8c963eb060e6aa26697d86590da4edc5707205450"},
+    {file = "duckdb-1.0.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:dbc6093a75242f002be1d96a6ace3fdf1d002c813e67baff52112e899de9292f"},
+    {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba1881a2b11c507cee18f8fd9ef10100be066fddaa2c20fba1f9a664245cd6d8"},
+    {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:445d0bb35087c522705c724a75f9f1c13f1eb017305b694d2686218d653c8142"},
+    {file = "duckdb-1.0.0-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:224553432e84432ffb9684f33206572477049b371ce68cc313a01e214f2fbdda"},
+    {file = "duckdb-1.0.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:d3914032e47c4e76636ad986d466b63fdea65e37be8a6dfc484ed3f462c4fde4"},
+    {file = "duckdb-1.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:af9128a2eb7e1bb50cd2c2020d825fb2946fdad0a2558920cd5411d998999334"},
+    {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:dd2659a5dbc0df0de68f617a605bf12fe4da85ba24f67c08730984a0892087e8"},
+    {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_universal2.whl", hash = "sha256:ac5a4afb0bc20725e734e0b2c17e99a274de4801aff0d4e765d276b99dad6d90"},
+    {file = "duckdb-1.0.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:2c5a53bee3668d6e84c0536164589d5127b23d298e4c443d83f55e4150fafe61"},
+    {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b980713244d7708b25ee0a73de0c65f0e5521c47a0e907f5e1b933d79d972ef6"},
+    {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cbd4f9fe7b7a56eff96c3f4d6778770dd370469ca2212eddbae5dd63749db5"},
+    {file = "duckdb-1.0.0-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ed228167c5d49888c5ef36f6f9cbf65011c2daf9dcb53ea8aa7a041ce567b3e4"},
+    {file = "duckdb-1.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:46d8395fbcea7231fd5032a250b673cc99352fef349b718a23dea2c0dd2b8dec"},
+    {file = "duckdb-1.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:6ad1fc1a4d57e7616944166a5f9417bdbca1ea65c490797e3786e3a42e162d8a"},
+    {file = "duckdb-1.0.0.tar.gz", hash = "sha256:a2a059b77bc7d5b76ae9d88e267372deff19c291048d59450c431e166233d453"},
+]
+
 [[package]]
 name = "editorconfig"
 version = "0.12.3"
@@ -872,6 +927,7 @@ files = [
     {file = "greenlet-2.0.2-cp27-cp27m-win32.whl", hash = "sha256:6c3acb79b0bfd4fe733dff8bc62695283b57949ebcca05ae5c129eb606ff2d74"},
     {file = "greenlet-2.0.2-cp27-cp27m-win_amd64.whl", hash = "sha256:283737e0da3f08bd637b5ad058507e578dd462db259f7f6e4c5c365ba4ee9343"},
     {file = "greenlet-2.0.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:d27ec7509b9c18b6d73f2f5ede2622441de812e7b1a80bbd446cb0633bd3d5ae"},
+    {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d967650d3f56af314b72df7089d96cda1083a7fc2da05b375d2bc48c82ab3f3c"},
     {file = "greenlet-2.0.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:30bcf80dda7f15ac77ba5af2b961bdd9dbc77fd4ac6105cee85b0d0a5fcf74df"},
     {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26fbfce90728d82bc9e6c38ea4d038cba20b7faf8a0ca53a9c07b67318d46088"},
     {file = "greenlet-2.0.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9190f09060ea4debddd24665d6804b995a9c122ef5917ab26e1566dcc712ceeb"},
@@ -880,6 +936,7 @@ files = [
     {file = "greenlet-2.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:76ae285c8104046b3a7f06b42f29c7b73f77683df18c49ab5af7983994c2dd91"},
     {file = "greenlet-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2d4686f195e32d36b4d7cf2d166857dbd0ee9f3d20ae349b6bf8afc8485b3645"},
     {file = "greenlet-2.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c4302695ad8027363e96311df24ee28978162cdcdd2006476c43970b384a244c"},
+    {file = "greenlet-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d4606a527e30548153be1a9f155f4e283d109ffba663a15856089fb55f933e47"},
     {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c48f54ef8e05f04d6eff74b8233f6063cb1ed960243eacc474ee73a2ea8573ca"},
     {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1846f1b999e78e13837c93c778dcfc3365902cfb8d1bdb7dd73ead37059f0d0"},
     {file = "greenlet-2.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a06ad5312349fec0ab944664b01d26f8d1f05009566339ac6f63f56589bc1a2"},
@@ -909,6 +966,7 @@ files = [
     {file = "greenlet-2.0.2-cp37-cp37m-win32.whl", hash = "sha256:3f6ea9bd35eb450837a3d80e77b517ea5bc56b4647f5502cd28de13675ee12f7"},
     {file = "greenlet-2.0.2-cp37-cp37m-win_amd64.whl", hash = "sha256:7492e2b7bd7c9b9916388d9df23fa49d9b88ac0640db0a5b4ecc2b653bf451e3"},
     {file = "greenlet-2.0.2-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b864ba53912b6c3ab6bcb2beb19f19edd01a6bfcbdfe1f37ddd1778abfe75a30"},
+    {file = "greenlet-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1087300cf9700bbf455b1b97e24db18f2f77b55302a68272c56209d5587c12d1"},
     {file = "greenlet-2.0.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:ba2956617f1c42598a308a84c6cf021a90ff3862eddafd20c3333d50f0edb45b"},
     {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc3a569657468b6f3fb60587e48356fe512c1754ca05a564f11366ac9e306526"},
     {file = "greenlet-2.0.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8eab883b3b2a38cc1e050819ef06a7e6344d4a990d24d45bc6f2cf959045a45b"},
@@ -917,6 +975,7 @@ files = [
     {file = "greenlet-2.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b0ef99cdbe2b682b9ccbb964743a6aca37905fda5e0452e5ee239b1654d37f2a"},
     {file = "greenlet-2.0.2-cp38-cp38-win32.whl", hash = "sha256:b80f600eddddce72320dbbc8e3784d16bd3fb7b517e82476d8da921f27d4b249"},
     {file = "greenlet-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:4d2e11331fc0c02b6e84b0d28ece3a36e0548ee1a1ce9ddde03752d9b79bba40"},
+    {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:8512a0c38cfd4e66a858ddd1b17705587900dd760c6003998e9472b77b56d417"},
     {file = "greenlet-2.0.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:88d9ab96491d38a5ab7c56dd7a3cc37d83336ecc564e4e8816dbed12e5aaefc8"},
     {file = "greenlet-2.0.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:561091a7be172ab497a3527602d467e2b3fbe75f9e783d8b8ce403fa414f71a6"},
     {file = "greenlet-2.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:971ce5e14dc5e73715755d0ca2975ac88cfdaefcaab078a284fea6cfabf866df"},
@@ -2990,6 +3049,7 @@ description = "A pure Python implementation of the trie data structure."
 optional = false
 python-versions = "*"
 files = [
+    {file = "PyTrie-0.4.0-py3-none-any.whl", hash = "sha256:f687c224ee8c66cda8e8628a903011b692635ffbb08d4b39c5f92b18eb78c950"},
     {file = "PyTrie-0.4.0.tar.gz", hash = "sha256:8f4488f402d3465993fb6b6efa09866849ed8cda7903b50647b7d0342b805379"},
 ]
 
@@ -4164,6 +4224,16 @@ files = [
     {file = "wrapt-1.14.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8ad85f7f4e20964db4daadcab70b47ab05c7c1cf2a7c1e51087bfaa83831854c"},
     {file = "wrapt-1.14.1-cp310-cp310-win32.whl", hash = "sha256:a9a52172be0b5aae932bef82a79ec0a0ce87288c7d132946d645eba03f0ad8a8"},
     {file = "wrapt-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:6d323e1554b3d22cfc03cd3243b5bb815a51f5249fdcbb86fda4bf62bab9e164"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ecee4132c6cd2ce5308e21672015ddfed1ff975ad0ac8d27168ea82e71413f55"},
+    {file = "wrapt-1.14.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2020f391008ef874c6d9e208b24f28e31bcb85ccff4f335f15a3251d222b92d9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2feecf86e1f7a86517cab34ae6c2f081fd2d0dac860cb0c0ded96d799d20b335"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:240b1686f38ae665d1b15475966fe0472f78e71b1b4903c143a842659c8e4cb9"},
+    {file = "wrapt-1.14.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9008dad07d71f68487c91e96579c8567c98ca4c3881b9b113bc7b33e9fd78b8"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6447e9f3ba72f8e2b985a1da758767698efa72723d5b59accefd716e9e8272bf"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:acae32e13a4153809db37405f5eba5bac5fbe2e2ba61ab227926a22901051c0a"},
+    {file = "wrapt-1.14.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49ef582b7a1152ae2766557f0550a9fcbf7bbd76f43fbdc94dd3bf07cc7168be"},
+    {file = "wrapt-1.14.1-cp311-cp311-win32.whl", hash = "sha256:358fe87cc899c6bb0ddc185bf3dbfa4ba646f05b1b0b9b5a27c2cb92c2cea204"},
+    {file = "wrapt-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:26046cd03936ae745a502abf44dac702a5e6880b2b01c29aea8ddf3353b68224"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:43ca3bbbe97af00f49efb06e352eae40434ca9d915906f77def219b88e85d907"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:6b1a564e6cb69922c7fe3a678b9f9a3c54e72b469875aa8018f18b4d1dd1adf3"},
     {file = "wrapt-1.14.1-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:00b6d4ea20a906c0ca56d84f93065b398ab74b927a7a3dbd470f6fc503f95dc3"},
@@ -4229,4 +4299,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4.0.0"
-content-hash = "9e41c6e64e516afe77dbb394618907a361bce96d84fd03155bf269e843060613"
+content-hash = "8b066ac09631ffaa8c30d15b0656dcd8801e0bd15d4f4041833016571f456daa"
diff --git a/pyproject.toml b/pyproject.toml
index 02f163f43..8d3baf914 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,7 @@ matplotlib = "^3.7.0"
 pyserde = "^0.9.8"
 polars = "^0.19.15"
 scikit-learn = "^1.4.0"
+duckdb = "^1.0.0"
 
 [tool.poetry.dev-dependencies]
 pytest = "^7.2.0"

From b2fc7790e0d37d81567b5ae1661ce65f9ecabf14 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 14:06:04 +0100
Subject: [PATCH 02/81] move methods to retrieve variants/genes/diseases from
 phenopackets

---
 .../disease_prioritisation_analysis.py        | 18 +------
 .../analyse/gene_prioritisation_analysis.py   | 17 +------
 src/pheval/analyse/parse_corpus.py            | 49 +++++++++++++++++++
 .../variant_prioritisation_analysis.py        | 18 +------
 4 files changed, 55 insertions(+), 47 deletions(-)
 create mode 100644 src/pheval/analyse/parse_corpus.py

diff --git a/src/pheval/analyse/disease_prioritisation_analysis.py b/src/pheval/analyse/disease_prioritisation_analysis.py
index aa5715f82..8041c4063 100644
--- a/src/pheval/analyse/disease_prioritisation_analysis.py
+++ b/src/pheval/analyse/disease_prioritisation_analysis.py
@@ -4,6 +4,7 @@
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
+from pheval.analyse.parse_corpus import _obtain_causative_diseases
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.prioritisation_rank_recorder import PrioritisationRankRecorder
 from pheval.analyse.prioritisation_result_types import DiseasePrioritisationResult
@@ -11,7 +12,7 @@
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
 from pheval.utils.file_utils import all_files
-from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandDisease, phenopacket_reader
+from pheval.utils.phenopacket_utils import ProbandDisease
 
 
 class AssessDiseasePrioritisation:
@@ -197,21 +198,6 @@ def assess_disease_prioritisation(
         )
 
 
-def _obtain_causative_diseases(phenopacket_path: Path) -> List[ProbandDisease]:
-    """
-    Obtain known diseases from a Phenopacket.
-    Args:
-       phenopacket_path (Path): Path to the Phenopacket file.
-
-    Returns:
-       List[ProbandDisease]: A list of known diseases associated with the proband,
-       extracted from the Phenopacket.
-    """
-    phenopacket = phenopacket_reader(phenopacket_path)
-    phenopacket_util = PhenopacketUtil(phenopacket)
-    return phenopacket_util.diagnoses()
-
-
 def assess_phenopacket_disease_prioritisation(
     phenopacket_path: Path,
     score_order: str,
diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 7d962e9de..dfa5f8f2b 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -6,6 +6,7 @@
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
+from pheval.analyse.parse_corpus import _obtain_causative_genes
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.prioritisation_rank_recorder import PrioritisationRankRecorder
 from pheval.analyse.prioritisation_result_types import GenePrioritisationResult
@@ -13,7 +14,7 @@
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
 from pheval.utils.file_utils import all_files
-from pheval.utils.phenopacket_utils import PhenopacketUtil, ProbandCausativeGene, phenopacket_reader
+from pheval.utils.phenopacket_utils import ProbandCausativeGene
 
 
 class AssessGenePrioritisation:
@@ -222,20 +223,6 @@ def assess_gene_prioritisation(
         )
 
 
-def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene]:
-    """
-    Obtain known genes from a Phenopacket.
-    Args:
-       phenopacket_path (Path): Path to the Phenopacket file.
-    Returns:
-       List[ProbandCausativeGene]: A list of known genes associated with the proband,
-       extracted from the Phenopacket.
-    """
-    phenopacket = phenopacket_reader(phenopacket_path)
-    phenopacket_util = PhenopacketUtil(phenopacket)
-    return phenopacket_util.diagnosed_genes()
-
-
 def assess_phenopacket_gene_prioritisation(
     phenopacket_path: Path,
     score_order: str,
diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
new file mode 100644
index 000000000..3ac15a478
--- /dev/null
+++ b/src/pheval/analyse/parse_corpus.py
@@ -0,0 +1,49 @@
+from pathlib import Path
+from typing import List
+
+from pheval.utils.phenopacket_utils import GenomicVariant, ProbandCausativeGene, phenopacket_reader, PhenopacketUtil, \
+    ProbandDisease
+
+
+def _obtain_causative_diseases(phenopacket_path: Path) -> List[ProbandDisease]:
+    """
+    Obtain known diseases from a Phenopacket.
+    Args:
+       phenopacket_path (Path): Path to the Phenopacket file.
+
+    Returns:
+       List[ProbandDisease]: A list of known diseases associated with the proband,
+       extracted from the Phenopacket.
+    """
+    phenopacket = phenopacket_reader(phenopacket_path)
+    phenopacket_util = PhenopacketUtil(phenopacket)
+    return phenopacket_util.diagnoses()
+
+
+def _obtain_causative_variants(phenopacket_path: Path) -> List[GenomicVariant]:
+    """
+    Obtain known variants from a Phenopacket.
+    Args:
+       phenopacket_path (Path): Path to the Phenopacket file.
+
+    Returns:
+       List[GenomicVariant]: A list of known variants associated with the proband,
+       extracted from the Phenopacket.
+    """
+    phenopacket = phenopacket_reader(phenopacket_path)
+    phenopacket_util = PhenopacketUtil(phenopacket)
+    return phenopacket_util.diagnosed_variants()
+
+
+def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene]:
+    """
+    Obtain known genes from a Phenopacket.
+    Args:
+       phenopacket_path (Path): Path to the Phenopacket file.
+    Returns:
+       List[ProbandCausativeGene]: A list of known genes associated with the proband,
+       extracted from the Phenopacket.
+    """
+    phenopacket = phenopacket_reader(phenopacket_path)
+    phenopacket_util = PhenopacketUtil(phenopacket)
+    return phenopacket_util.diagnosed_genes()
diff --git a/src/pheval/analyse/variant_prioritisation_analysis.py b/src/pheval/analyse/variant_prioritisation_analysis.py
index 6c87d2f19..2aecc4a67 100644
--- a/src/pheval/analyse/variant_prioritisation_analysis.py
+++ b/src/pheval/analyse/variant_prioritisation_analysis.py
@@ -4,6 +4,7 @@
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
+from pheval.analyse.parse_corpus import _obtain_causative_variants
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.prioritisation_rank_recorder import PrioritisationRankRecorder
 from pheval.analyse.prioritisation_result_types import VariantPrioritisationResult
@@ -11,7 +12,7 @@
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalVariantResult
 from pheval.utils.file_utils import all_files
-from pheval.utils.phenopacket_utils import GenomicVariant, PhenopacketUtil, phenopacket_reader
+from pheval.utils.phenopacket_utils import GenomicVariant
 
 
 class AssessVariantPrioritisation:
@@ -191,21 +192,6 @@ def assess_variant_prioritisation(
         )
 
 
-def _obtain_causative_variants(phenopacket_path: Path) -> List[GenomicVariant]:
-    """
-    Obtain known variants from a Phenopacket.
-    Args:
-       phenopacket_path (Path): Path to the Phenopacket file.
-
-    Returns:
-       List[GenomicVariant]: A list of known variants associated with the proband,
-       extracted from the Phenopacket.
-    """
-    phenopacket = phenopacket_reader(phenopacket_path)
-    phenopacket_util = PhenopacketUtil(phenopacket)
-    return phenopacket_util.diagnosed_variants()
-
-
 def assess_phenopacket_variant_prioritisation(
     phenopacket_path: Path,
     score_order: str,

From c0ce36e2ab0aff24498e1f743a5ffe0f8758db8f Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 14:45:43 +0100
Subject: [PATCH 03/81] add `CorpusParser` class to parse phenopacket corpus
 and record known genes/diseases/variants in tables

---
 src/pheval/analyse/parse_corpus.py | 168 ++++++++++++++++++++++++++++-
 1 file changed, 167 insertions(+), 1 deletion(-)

diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
index 3ac15a478..e6a1ad2e4 100644
--- a/src/pheval/analyse/parse_corpus.py
+++ b/src/pheval/analyse/parse_corpus.py
@@ -1,6 +1,10 @@
 from pathlib import Path
-from typing import List
+from typing import List, Union
 
+from pheval.analyse.benchmark_generator import GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, \
+    DiseaseBenchmarkRunOutputGenerator
+from pheval.get_connection import get_connection
+from pheval.utils.file_utils import all_files
 from pheval.utils.phenopacket_utils import GenomicVariant, ProbandCausativeGene, phenopacket_reader, PhenopacketUtil, \
     ProbandDisease
 
@@ -47,3 +51,165 @@ def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene
     phenopacket = phenopacket_reader(phenopacket_path)
     phenopacket_util = PhenopacketUtil(phenopacket)
     return phenopacket_util.diagnosed_genes()
+
+class CorpusParser:
+    """ Class for parsing phenopacket corpus and retrieving known variants/genes/diseases."""
+    def __init__(self, phenopacket_dir: Path) -> None:
+        """
+        Initialise the CorpusParser class.
+        Args:
+            phenopacket_dir (Path): Path to the Phenopacket directory.
+        """
+        self.phenopacket_dir = phenopacket_dir
+        self.conn = get_connection()
+        self.table_name = phenopacket_dir.parents[0].name
+
+    def _create_gene_table(self) -> None:
+        """
+        Create the Gene benchmarking table if it doesn't already exist.
+        """
+        self.conn.execute(
+            f"""
+                    CREATE TABLE IF NOT EXISTS {self.table_name}_gene (
+                        identifier VARCHAR(255) PRIMARY KEY,
+                        phenopacket VARCHAR,
+                        gene_symbol VARCHAR,
+                        gene_identifier VARCHAR
+                    )
+                    """
+        )
+
+    def _create_variant_table(self) -> None:
+        """
+        Create the Variant benchmarking table if it doesn't already exist.'
+        """
+        self.conn.execute(
+            f"""
+                    CREATE TABLE IF NOT EXISTS {self.table_name}_variant (
+                        identifier VARCHAR(255) PRIMARY KEY,
+                        phenopacket VARCHAR,
+                        chrom VARCHAR,
+                        pos INTEGER,
+                        reference VARCHAR,
+                        alt VARCHAR
+                    )
+                    """
+        )
+
+    def _create_disease_table(self):
+        """
+        Create the Disease benchmarking table if it doesn't already exist.'
+        """
+        self.conn.execute(
+            f"""
+                    CREATE TABLE IF NOT EXISTS {self.table_name}_disease (
+                        identifier VARCHAR(255) PRIMARY KEY,
+                        phenopacket VARCHAR,
+                        disease_identifier VARCHAR,
+                        disease_name VARCHAR
+                    )
+                    """
+        )
+
+    def _create_tables(self, benchmark_generator: Union[
+        GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, DiseaseBenchmarkRunOutputGenerator]) -> None:
+        """
+        Create tables based on the benchmarking analysis specified.
+        Args:
+            benchmark_generator
+            (Union[GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator,
+            DiseaseBenchmarkRunOutputGenerator]: Class instance of the benchmark generator type.
+        """
+
+        if isinstance(benchmark_generator, GeneBenchmarkRunOutputGenerator):
+            self._create_gene_table()
+        if isinstance(benchmark_generator, VariantBenchmarkRunOutputGenerator):
+            self._create_variant_table()
+        if isinstance(benchmark_generator, DiseaseBenchmarkRunOutputGenerator):
+            self._create_disease_table()
+
+    def _insert_genes(self, phenopacket_path: Path, genes: List[ProbandCausativeGene]) -> None:
+        """
+        Insert known disease-causing genes into the Gene benchmarking table.
+        Args:
+            phenopacket_path(Path): Path to the Phenopacket file.
+            genes(List[ProbandCausativeGene]): List of known genes associated with the proband.
+        """
+        for gene in genes:
+            identifier = f"{phenopacket_path.name}-{gene.gene_symbol}"
+            self.conn.execute(
+                f"""
+                INSERT OR IGNORE INTO {self.table_name}_gene (identifier, phenopacket, gene_symbol, gene_identifier)
+                VALUES (?, ?, ?, ?)
+                """,
+                (identifier, phenopacket_path.name, gene.gene_symbol, gene.gene_identifier),
+            )
+
+    def _insert_variants(self, phenopacket_path: Path, variants: List[GenomicVariant]) -> None:
+        """
+        Insert known variants into the Variant benchmarking table.
+        Args:
+            phenopacket_path (Path): Path to the Phenopacket file.:
+            variants (List[GenomicVariant]): List of known variants associated with the proband.
+        """
+        for variant in variants:
+            identifier = (
+                f"{phenopacket_path.name}-{variant.chrom}-{variant.pos}-{variant.ref}-{variant.alt}"
+            )
+            self.conn.execute(
+                f"""
+                INSERT OR IGNORE INTO {self.table_name}_variant (identifier, phenopacket, chrom, pos, reference, alt)
+                VALUES (?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    identifier,
+                    phenopacket_path.name,
+                    variant.chrom,
+                    variant.pos,
+                    variant.ref,
+                    variant.alt,
+                ),
+            )
+
+    def _insert_diseases(self, phenopacket_path: Path, diseases: List[ProbandDisease]) -> None:
+        """
+        Insert known diseases into the Disease benchmarking table.
+        Args:
+            phenopacket_path (Path): Path to the Phenopacket file.:
+            diseases (List[ProbandDisease]): List of known diseases associated with the proband.
+        """
+        for disease in diseases:
+            identifier = f"{phenopacket_path.name}-{disease.disease_identifier}"
+            self.conn.execute(
+                f"""
+                INSERT INTO {self.table_name}_disease (identifier, phenopacket, disease_identifier, disease_name)
+                VALUES (?, ?, ?, ?)
+                """,
+                (
+                    identifier,
+                    phenopacket_path.name,
+                    disease.disease_identifier,
+                    disease.disease_name,
+                ),
+            )
+
+    def parse_corpus(self, benchmark_generator: Union[GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, DiseaseBenchmarkRunOutputGenerator]) -> None:
+        """
+        Parse the phenopacket corpus and add known genes/variants/diseases to relevant benchmarking tables.
+        Args:
+            benchmark_generator
+            (Union[GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator,
+            DiseaseBenchmarkRunOutputGenerator]): Class instance of the benchmark generator type.
+        """
+        self._create_tables(benchmark_generator)
+        for phenopacket_path in all_files(self.phenopacket_dir):
+            if isinstance(benchmark_generator, GeneBenchmarkRunOutputGenerator):
+                genes = _obtain_causative_genes(phenopacket_path)
+                self._insert_genes(phenopacket_path, genes)
+            if isinstance(benchmark_generator, VariantBenchmarkRunOutputGenerator):
+                variants = _obtain_causative_variants(phenopacket_path)
+                self._insert_variants(phenopacket_path, variants)
+            if isinstance(benchmark_generator, DiseaseBenchmarkRunOutputGenerator):
+                diseases = _obtain_causative_diseases(phenopacket_path)
+                self._insert_diseases(phenopacket_path, diseases)
+        self.conn.close()

From 8baef5a5956d9bdc14785eb35564d2fe04e162b3 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 15:15:23 +0100
Subject: [PATCH 04/81] change typing to `BenchmarkRunOutputGenerator`

---
 src/pheval/analyse/parse_corpus.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
index e6a1ad2e4..a7cbdc001 100644
--- a/src/pheval/analyse/parse_corpus.py
+++ b/src/pheval/analyse/parse_corpus.py
@@ -1,8 +1,8 @@
 from pathlib import Path
-from typing import List, Union
+from typing import List
 
 from pheval.analyse.benchmark_generator import GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, \
-    DiseaseBenchmarkRunOutputGenerator
+    DiseaseBenchmarkRunOutputGenerator, BenchmarkRunOutputGenerator
 from pheval.get_connection import get_connection
 from pheval.utils.file_utils import all_files
 from pheval.utils.phenopacket_utils import GenomicVariant, ProbandCausativeGene, phenopacket_reader, PhenopacketUtil, \
@@ -52,8 +52,10 @@ def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene
     phenopacket_util = PhenopacketUtil(phenopacket)
     return phenopacket_util.diagnosed_genes()
 
+
 class CorpusParser:
     """ Class for parsing phenopacket corpus and retrieving known variants/genes/diseases."""
+
     def __init__(self, phenopacket_dir: Path) -> None:
         """
         Initialise the CorpusParser class.
@@ -81,7 +83,7 @@ def _create_gene_table(self) -> None:
 
     def _create_variant_table(self) -> None:
         """
-        Create the Variant benchmarking table if it doesn't already exist.'
+        Create the Variant benchmarking table if it doesn't already exist.
         """
         self.conn.execute(
             f"""
@@ -98,7 +100,7 @@ def _create_variant_table(self) -> None:
 
     def _create_disease_table(self):
         """
-        Create the Disease benchmarking table if it doesn't already exist.'
+        Create the Disease benchmarking table if it doesn't already exist.
         """
         self.conn.execute(
             f"""
@@ -111,14 +113,11 @@ def _create_disease_table(self):
                     """
         )
 
-    def _create_tables(self, benchmark_generator: Union[
-        GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, DiseaseBenchmarkRunOutputGenerator]) -> None:
+    def _create_tables(self, benchmark_generator: BenchmarkRunOutputGenerator) -> None:
         """
         Create tables based on the benchmarking analysis specified.
         Args:
-            benchmark_generator
-            (Union[GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator,
-            DiseaseBenchmarkRunOutputGenerator]: Class instance of the benchmark generator type.
+            benchmark_generator (BenchmarkRunOutputGenerator): Class instance of the benchmark generator type.
         """
 
         if isinstance(benchmark_generator, GeneBenchmarkRunOutputGenerator):
@@ -193,13 +192,11 @@ def _insert_diseases(self, phenopacket_path: Path, diseases: List[ProbandDisease
                 ),
             )
 
-    def parse_corpus(self, benchmark_generator: Union[GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, DiseaseBenchmarkRunOutputGenerator]) -> None:
+    def parse_corpus(self, benchmark_generator: BenchmarkRunOutputGenerator) -> None:
         """
         Parse the phenopacket corpus and add known genes/variants/diseases to relevant benchmarking tables.
         Args:
-            benchmark_generator
-            (Union[GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator,
-            DiseaseBenchmarkRunOutputGenerator]): Class instance of the benchmark generator type.
+            benchmark_generator (BenchmarkRunOutputGenerator): Class instance of the benchmark generator type.
         """
         self._create_tables(benchmark_generator)
         for phenopacket_path in all_files(self.phenopacket_dir):

From 85e19d1d647ec118f26ae552f1ea195fa69bb7cc Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 15:16:10 +0100
Subject: [PATCH 05/81] implement `CorpusParser().parse_corpus()` method

---
 src/pheval/analyse/analysis.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index 0fe69b36a..2258127db 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -12,6 +12,7 @@
     generate_benchmark_comparison_output,
     generate_benchmark_output,
 )
+from pheval.analyse.parse_corpus import CorpusParser
 from pheval.analyse.rank_stats import RankStatsWriter
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 
@@ -34,6 +35,7 @@ def _run_benchmark(
         plot_type (str): Type of plot for benchmark visualisation.
         benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
     """
+    CorpusParser(results_dir_and_input.phenopacket_dir).parse_corpus(benchmark_generator)
     stats_writer = RankStatsWriter(
         Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
@@ -125,6 +127,9 @@ def _run_benchmark_comparison(
     stats_writer = RankStatsWriter(
         Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
+    unique_test_corpora_directories = set([result.phenopacket_dir for result in results_directories])
+    [CorpusParser(test_corpora_directory).parse_corpus(benchmark_generator) for test_corpora_directory in
+     unique_test_corpora_directories]
     benchmarking_results = []
     for results_dir_and_input in results_directories:
         rank_comparison = defaultdict(dict)

From 07e3458929788861d539462227eee64d8ee865fe Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 15:21:27 +0100
Subject: [PATCH 06/81] add `get_connection()` method to connect to the
 benchmarking db

---
 src/pheval/analyse/get_connection.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 src/pheval/analyse/get_connection.py

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
new file mode 100644
index 000000000..4e8c936d3
--- /dev/null
+++ b/src/pheval/analyse/get_connection.py
@@ -0,0 +1,17 @@
+import duckdb
+from duckdb import DuckDBPyConnection
+
+
+def dict_factory(cursor, row):
+    d = {}
+    for idx, col in enumerate(cursor.description):
+        d[col[0]] = row[idx]
+    return d
+
+
+def get_connection() -> DuckDBPyConnection:
+    """
+    Get a connection to the benchmarking results database.
+    """
+    conn = duckdb.connect("analysis.db")
+    return conn

From bf2b6138ed3a75be9cfc0cf50dabc4859c1cf3af Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 15:54:34 +0100
Subject: [PATCH 07/81] record matched results in db rather than using
 dictionaries

---
 .../analyse/gene_prioritisation_analysis.py   | 207 +++++++-----------
 1 file changed, 75 insertions(+), 132 deletions(-)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index dfa5f8f2b..df231a8bc 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -4,30 +4,29 @@
 from pathlib import Path
 from typing import List, Union
 
+import duckdb
+
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.parse_corpus import _obtain_causative_genes
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
-from pheval.analyse.prioritisation_rank_recorder import PrioritisationRankRecorder
 from pheval.analyse.prioritisation_result_types import GenePrioritisationResult
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
+from pheval.analyse.get_connection import get_connection
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
 from pheval.utils.file_utils import all_files
-from pheval.utils.phenopacket_utils import ProbandCausativeGene
 
 
 class AssessGenePrioritisation:
     """Class for assessing gene prioritisation based on thresholds and scoring orders."""
 
     def __init__(
-        self,
-        phenopacket_path: Path,
-        results_dir: Path,
-        standardised_gene_results: List[RankedPhEvalGeneResult],
-        threshold: float,
-        score_order: str,
-        proband_causative_genes: List[ProbandCausativeGene],
+            self,
+            phenopacket_path: Path,
+            results_dir: Path,
+            standardised_gene_results: List[RankedPhEvalGeneResult],
+            threshold: float,
+            score_order: str,
     ):
         """
         Initialise AssessGenePrioritisation class.
@@ -38,108 +37,79 @@ def __init__(
             standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending
-            proband_causative_genes (List[ProbandCausativeGene]): List of proband causative genes
         """
         self.phenopacket_path = phenopacket_path
         self.results_dir = results_dir
         self.standardised_gene_results = standardised_gene_results
         self.threshold = threshold
         self.score_order = score_order
-        self.proband_causative_genes = proband_causative_genes
-
-    def _record_gene_prioritisation_match(
-        self,
-        gene: ProbandCausativeGene,
-        result_entry: RankedPhEvalGeneResult,
-        rank_stats: RankStats,
-    ) -> GenePrioritisationResult:
-        """
-        Record the gene prioritisation rank if found within the results
-
-        Args:
-            gene (ProbandCausativeGene): Diagnosed proband gene
-            result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry
-            rank_stats (RankStats): RankStats class instance
-
-        Returns:
-            GenePrioritisationResult: Recorded correct gene prioritisation rank result
-        """
-        rank = result_entry.rank
-        rank_stats.add_rank(rank)
-        return GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol, rank)
+        self.conn = get_connection()
+        self.new_col = str(self.results_dir.parents[0])
+        self.table_name = f"{phenopacket_path.parents[1].name}_gene"
+        try:
+            self.conn.execute(
+                f'ALTER TABLE {self.table_name} ADD COLUMN "{self.new_col}" INTEGER DEFAULT 0'
+            )
+            self.conn.execute(f'UPDATE {self.table_name} SET "{self.new_col}" = 0')
+            self.conn.commit()
+        except duckdb.CatalogException:
+            pass
 
     def _assess_gene_with_threshold_ascending_order(
-        self,
-        result_entry: RankedPhEvalGeneResult,
-        gene: ProbandCausativeGene,
-        rank_stats: RankStats,
-    ) -> GenePrioritisationResult:
+            self,
+            result_entry: RankedPhEvalGeneResult,
+    ) -> int:
         """
         Record the gene prioritisation rank if it meets the ascending order threshold.
-
-        This method checks if the gene prioritisation rank meets the ascending order threshold.
-        If the score of the result entry is less than the threshold, it records the gene rank.
-
         Args:
             result_entry (RankedPhEvalGeneResult): Ranked PhEval gene result entry
-            gene (ProbandCausativeGene): Diagnosed proband gene
-            rank_stats (RankStats): RankStats class instance
         Returns:
-            GenePrioritisationResult: Recorded correct gene prioritisation rank result
+            int: Recorded gene prioritisation rank.
         """
         if float(self.threshold) > float(result_entry.score):
-            return self._record_gene_prioritisation_match(gene, result_entry, rank_stats)
+            return result_entry.rank
+        else:
+            return 0
 
     def _assess_gene_with_threshold(
-        self,
-        result_entry: RankedPhEvalGeneResult,
-        gene: ProbandCausativeGene,
-        rank_stats: RankStats,
-    ) -> GenePrioritisationResult:
+            self,
+            result_entry: RankedPhEvalGeneResult,
+    ) -> int:
         """
         Record the gene prioritisation rank if it meets the score threshold.
-        This method checks if the gene prioritisation rank meets the score threshold.
-        If the score of the result entry is greater than the threshold, it records the gene rank.
-
         Args:
             result_entry (RankedPhEvalResult): Ranked PhEval gene result entry
-            gene (ProbandCausativeGene): Diagnosed proband gene
-            rank_stats (RankStats): RankStats class instance
 
         Returns:
-            GenePrioritisationResult: Recorded correct gene prioritisation rank result
+            int: Recorded correct gene prioritisation rank.
         """
         if float(self.threshold) < float(result_entry.score):
-            return self._record_gene_prioritisation_match(gene, result_entry, rank_stats)
+            return result_entry.rank
+        else:
+            return 0
 
     def _record_matched_gene(
-        self,
-        gene: ProbandCausativeGene,
-        rank_stats: RankStats,
-        standardised_gene_result: RankedPhEvalGeneResult,
-    ) -> GenePrioritisationResult:
+            self,
+            standardised_gene_result: RankedPhEvalGeneResult,
+    ) -> int:
         """
         Return the gene rank result - handling the specification of a threshold.
         This method determines and returns the gene rank result based on the specified threshold
         and score order. If the threshold is 0.0, it records the gene rank directly.
         Otherwise, it assesses the gene with the threshold based on the score order.
         Args:
-            gene (ProbandCausativeGene): Diagnosed proband gene
-            rank_stats (RankStats): RankStats class instance
             standardised_gene_result (RankedPhEvalGeneResult): Ranked PhEval gene result entry
         Returns:
             GenePrioritisationResult: Recorded correct gene prioritisation rank result
         """
         if float(self.threshold) == 0.0:
-            return self._record_gene_prioritisation_match(
-                gene, standardised_gene_result, rank_stats
-            )
+            return standardised_gene_result.rank
         else:
             return (
-                self._assess_gene_with_threshold(standardised_gene_result, gene, rank_stats)
+                self._assess_gene_with_threshold(standardised_gene_result)
                 if self.score_order != "ascending"
                 else self._assess_gene_with_threshold_ascending_order(
-                    standardised_gene_result, gene, rank_stats
+                    standardised_gene_result,
                 )
             )
 
@@ -162,10 +132,8 @@ def _check_string_representation(entity: str) -> Union[List[str], str]:
             return entity
 
     def assess_gene_prioritisation(
-        self,
-        rank_stats: RankStats,
-        rank_records: defaultdict,
-        binary_classification_stats: BinaryClassificationStats,
+            self,
+            binary_classification_stats: BinaryClassificationStats,
     ) -> None:
         """
         Assess gene prioritisation.
@@ -173,64 +141,45 @@ def assess_gene_prioritisation(
         and records ranks using a PrioritisationRankRecorder.
 
         Args:
-            rank_stats (RankStats): RankStats class instance
-            rank_records (defaultdict): A defaultdict to store the correct ranked results.
             binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         """
         relevant_ranks = []
-        for gene in self.proband_causative_genes:
-            rank_stats.total += 1
-            gene_match = GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol)
-            for standardised_gene_result in self.standardised_gene_results:
-                gene_identifier = self._check_string_representation(
-                    standardised_gene_result.gene_identifier
+        df = self.conn.execute(
+            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{self.phenopacket_path.name}'""").fetchdf()
+        for i, row in df.iterrows():
+            generated_matches = list(
+                result for result in self.standardised_gene_results
+                if (
+                        isinstance(self._check_string_representation(result.gene_identifier), list)
+                        and row["gene_identifier"] in self._check_string_representation(result.gene_identifier)
+                        or isinstance(self._check_string_representation(result.gene_identifier), str)
+                        and row["gene_identifier"] == self._check_string_representation(result.gene_identifier)
+                        or isinstance(self._check_string_representation(result.gene_symbol), list)
+                        and row["gene_symbol"] in self._check_string_representation(result.gene_symbol)
+                        or isinstance(self._check_string_representation(result.gene_symbol), str)
+                        and row["gene_symbol"] == self._check_string_representation(result.gene_symbol)
                 )
-                gene_symbol = self._check_string_representation(
-                    standardised_gene_result.gene_symbol
+            )
+            if len(generated_matches) > 0:
+                gene_match = self._record_matched_gene(generated_matches[0])
+                relevant_ranks.append(gene_match)
+                primary_key = f"{self.phenopacket_path.name}-{row['gene_symbol']}"
+                self.conn.execute(
+                    f'UPDATE {self.table_name} SET "{self.new_col}" = ? WHERE identifier = ?',
+                    (gene_match, primary_key),
                 )
-                if (
-                    isinstance(gene_identifier, list)
-                    and gene.gene_identifier in gene_identifier
-                    or isinstance(gene_identifier, str)
-                    and gene.gene_identifier == str
-                    or isinstance(gene_symbol, list)
-                    and gene.gene_symbol in gene_symbol
-                    or isinstance(gene_symbol, str)
-                    and gene.gene_symbol == gene_symbol
-                ):
-                    gene_match = self._record_matched_gene(
-                        gene, rank_stats, standardised_gene_result
-                    )
-                    (
-                        relevant_ranks.append(gene_match.rank)
-                        if gene_match
-                        else relevant_ranks.append(0)
-                    )
-                    break
-            PrioritisationRankRecorder(
-                rank_stats.total,
-                self.results_dir,
-                (
-                    GenePrioritisationResult(self.phenopacket_path, gene.gene_symbol)
-                    if gene_match is None
-                    else gene_match
-                ),
-                rank_records,
-            ).record_rank()
-        rank_stats.relevant_result_ranks.append(relevant_ranks)
         binary_classification_stats.add_classification(
             pheval_results=self.standardised_gene_results, relevant_ranks=relevant_ranks
         )
+        self.conn.close()
 
 
 def assess_phenopacket_gene_prioritisation(
-    phenopacket_path: Path,
-    score_order: str,
-    results_dir_and_input: TrackInputOutputDirectories,
-    threshold: float,
-    gene_rank_stats: RankStats,
-    gene_rank_comparison: defaultdict,
-    gene_binary_classification_stats: BinaryClassificationStats,
+        phenopacket_path: Path,
+        score_order: str,
+        results_dir_and_input: TrackInputOutputDirectories,
+        threshold: float,
+        gene_binary_classification_stats: BinaryClassificationStats,
 ) -> None:
     """
     Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results
@@ -241,32 +190,28 @@ def assess_phenopacket_gene_prioritisation(
         score_order (str): The order in which scores are arranged, either ascending or descending.
         results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
         threshold (float): Threshold for assessment.
-        gene_rank_stats (RankStats): RankStats class instance.
-        gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
         gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
     """
     standardised_gene_result = results_dir_and_input.results_dir.joinpath(
         f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
     )
     pheval_gene_result = read_standardised_result(standardised_gene_result)
-    proband_causative_genes = _obtain_causative_genes(phenopacket_path)
     AssessGenePrioritisation(
         phenopacket_path,
         results_dir_and_input.results_dir.joinpath("pheval_gene_results/"),
         parse_pheval_result(RankedPhEvalGeneResult, pheval_gene_result),
         threshold,
         score_order,
-        proband_causative_genes,
     ).assess_gene_prioritisation(
-        gene_rank_stats, gene_rank_comparison, gene_binary_classification_stats
+       gene_binary_classification_stats
     )
 
 
 def benchmark_gene_prioritisation(
-    results_directory_and_input: TrackInputOutputDirectories,
-    score_order: str,
-    threshold: float,
-    gene_rank_comparison: defaultdict,
+        results_directory_and_input: TrackInputOutputDirectories,
+        score_order: str,
+        threshold: float,
+        gene_rank_comparison: defaultdict,
 ) -> BenchmarkRunResults:
     """
     Benchmark a directory based on gene prioritisation results.
@@ -287,8 +232,6 @@ def benchmark_gene_prioritisation(
             score_order,
             results_directory_and_input,
             threshold,
-            gene_rank_stats,
-            gene_rank_comparison,
             gene_binary_classification_stats,
         )
     return BenchmarkRunResults(

From d79d55da033c9bc43ddbeae966a3a48ae02c4d97 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 16:11:10 +0100
Subject: [PATCH 08/81] replace `RankStats` and `RankStatsWriter` methods that
 focus on writing TSV with using the db to calculate stats & add stats to
 table

---
 src/pheval/analyse/rank_stats.py | 288 +++++++++++++++----------------
 1 file changed, 139 insertions(+), 149 deletions(-)

diff --git a/src/pheval/analyse/rank_stats.py b/src/pheval/analyse/rank_stats.py
index e25cb98e7..27e19c3df 100644
--- a/src/pheval/analyse/rank_stats.py
+++ b/src/pheval/analyse/rank_stats.py
@@ -1,13 +1,14 @@
-import csv
 from dataclasses import dataclass, field
 from pathlib import Path
 from statistics import mean
 from typing import List
 
 import numpy as np
+from duckdb import DuckDBPyConnection
 from sklearn.metrics import ndcg_score
 
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
+from pheval.analyse.get_connection import get_connection
 
 
 @dataclass
@@ -36,29 +37,32 @@ class RankStats:
     relevant_result_ranks: List[List[int]] = field(default_factory=list)
     mrr: float = None
 
-    def add_rank(self, rank: int) -> None:
-        """
-        Add rank for matched result.
+    def add_ranks(self, table_name: str, column_name: str):
+        conn = get_connection()
+        self.top = self._execute_count_query(conn, table_name, column_name, " = 1")
+        self.top3 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 3")
+        self.top5 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 5")
+        self.top10 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 10")
+        self.found = self._execute_count_query(conn, table_name, column_name, " > 0")
+        self.total = self._execute_count_query(conn, table_name, column_name, " >= 0")
+        self.reciprocal_ranks = self._fetch_reciprocal_ranks(conn, table_name, column_name)
+        self.relevant_result_ranks = self._fetch_relevant_ranks(conn, table_name, column_name)
+        conn.close()
 
-        Args:
-            rank (int): The rank value to be added.
-
-        Notes:
-            This method updates the internal attributes of the RankStats object based on the provided rank value.
-            It calculates various statistics such as the count of top ranks (1, 3, 5, and 10),
-            the total number of ranks found,and the reciprocal rank.
-            This function modifies the object's state by updating the internal attributes.
-        """
-        self.reciprocal_ranks.append(1 / rank)
-        self.found += 1
-        if rank == 1:
-            self.top += 1
-        if rank != "" and rank <= 3:
-            self.top3 += 1
-        if rank != "" and rank <= 5:
-            self.top5 += 1
-        if rank != "" and rank <= 10:
-            self.top10 += 1
+    @staticmethod
+    def _execute_count_query(conn: DuckDBPyConnection, table_name: str, column_name: str, condition: str) -> int:
+        query = f'SELECT COUNT(*) FROM {table_name} WHERE "{column_name}" {condition}'
+        return conn.execute(query).fetchone()[0]
+
+    @staticmethod
+    def _fetch_reciprocal_ranks(conn: DuckDBPyConnection, table_name: str, column_name: str) -> List[float]:
+        query = f'SELECT "{column_name}" FROM {table_name}'
+        return [1 / rank[0] if rank[0] > 0 else 0 for rank in conn.execute(query).fetchall()]
+
+    @staticmethod
+    def _fetch_relevant_ranks(conn: DuckDBPyConnection, table_name: str, column_name: str) -> List[List[int]]:
+        query = f'SELECT LIST("{column_name}") as values_list FROM {table_name} GROUP BY phenopacket'
+        return [rank[0] for rank in conn.execute(query).fetchall()]
 
     def percentage_rank(self, value: int) -> float:
         """
@@ -183,7 +187,7 @@ def precision_at_k(self, k: int) -> float:
 
     @staticmethod
     def _average_precision_at_k(
-        number_of_relevant_entities_at_k: int, precision_at_k: float
+            number_of_relevant_entities_at_k: int, precision_at_k: float
     ) -> float:
         """
         Calculate the Average Precision at k.
@@ -280,135 +284,121 @@ def mean_normalised_discounted_cumulative_gain(self, k: int) -> float:
 class RankStatsWriter:
     """Class for writing the rank stats to a file."""
 
-    def __init__(self, file: Path):
+    def __init__(self, table_name: str):
         """
         Initialise the RankStatsWriter class
         Args:
-            file (Path): Path to the file where rank stats will be written
-        """
-        self.file = open(file, "w")
-        self.writer = csv.writer(self.file, delimiter="\t")
-        self.writer.writerow(
-            [
-                "results_directory_path",
-                "top",
-                "top3",
-                "top5",
-                "top10",
-                "found",
-                "total",
-                "mean_reciprocal_rank",
-                "percentage_top",
-                "percentage_top3",
-                "percentage_top5",
-                "percentage_top10",
-                "percentage_found",
-                "precision@1",
-                "precision@3",
-                "precision@5",
-                "precision@10",
-                "MAP@1",
-                "MAP@3",
-                "MAP@5",
-                "MAP@10",
-                "f_beta_score@1",
-                "f_beta_score@3",
-                "f_beta_score@5",
-                "f_beta_score@10",
-                "NDCG@3",
-                "NDCG@5",
-                "NDCG@10",
-                "true_positives",
-                "false_positives",
-                "true_negatives",
-                "false_negatives",
-                "sensitivity",
-                "specificity",
-                "precision",
-                "negative_predictive_value",
-                "false_positive_rate",
-                "false_discovery_rate",
-                "false_negative_rate",
-                "accuracy",
-                "f1_score",
-                "matthews_correlation_coefficient",
-            ]
+            table_name (str): Name of table to add statistics.
+        """
+
+        self.table_name = table_name
+        conn = get_connection()
+        conn.execute(
+            f"""
+                    CREATE TABLE IF NOT EXISTS "{self.table_name}" (
+                        results_directory_path VARCHAR,
+                        top INT,
+                        top3 INT,
+                        top5 INT,
+                        top10 INT,
+                        "found" INT,
+                        total INT,
+                        mean_reciprocal_rank FLOAT,
+                        percentage_top FLOAT,
+                        percentage_top3 FLOAT,
+                        percentage_top5 FLOAT,
+                        percentage_top10 FLOAT,
+                        percentage_found FLOAT,
+                        "precision@1" FLOAT,
+                        "precision@3" FLOAT,
+                        "precision@5" FLOAT,
+                        "precision@10" FLOAT,
+                        "MAP@1" FLOAT,
+                        "MAP@3" FLOAT,
+                        "MAP@5" FLOAT,
+                        "MAP@10" FLOAT,
+                        "f_beta_score@1" FLOAT,
+                        "f_beta_score@3"FLOAT,
+                        "f_beta_score@5" FLOAT,
+                        "f_beta_score@10" FLOAT,
+                        "NDCG@3" FLOAT,
+                        "NDCG@5" FLOAT,
+                        "NDCG@10" FLOAT,
+                        true_positives INT,
+                        false_positives INT,
+                        true_negatives INT,
+                        false_negatives INT,
+                        sensitivity FLOAT,
+                        specificity FLOAT,
+                        "precision" FLOAT,
+                        negative_predictive_value FLOAT,
+                        false_positive_rate FLOAT,
+                        false_discovery_rate FLOAT,
+                        false_negative_rate FLOAT,
+                        accuracy FLOAT,
+                        f1_score FLOAT,
+                        matthews_correlation_coefficient FLOAT,
+                        
+                    )
+                    """
         )
+        conn.close()
 
-    def write_row(
-        self,
-        directory: Path,
-        rank_stats: RankStats,
-        binary_classification: BinaryClassificationStats,
-    ) -> None:
+    def add_statistics_entry(self,
+                             directory_path: Path,
+                             rank_stats: RankStats,
+                             binary_classification: BinaryClassificationStats):
         """
-        Write summary rank statistics row for a run to the file.
-
+        Add statistics row to table for a run.
         Args:
-            directory (Path): Path to the results directory corresponding to the run
-            rank_stats (RankStats): RankStats instance containing rank statistics corresponding to the run
-
-        Raises:
-            IOError: If there is an error writing to the file.
-        """
-        try:
-            self.writer.writerow(
-                [
-                    directory,
-                    rank_stats.top,
-                    rank_stats.top3,
-                    rank_stats.top5,
-                    rank_stats.top10,
-                    rank_stats.found,
-                    rank_stats.total,
-                    rank_stats.mean_reciprocal_rank(),
-                    rank_stats.percentage_top(),
-                    rank_stats.percentage_top3(),
-                    rank_stats.percentage_top5(),
-                    rank_stats.percentage_top10(),
-                    rank_stats.percentage_found(),
-                    rank_stats.precision_at_k(1),
-                    rank_stats.precision_at_k(3),
-                    rank_stats.precision_at_k(5),
-                    rank_stats.precision_at_k(10),
-                    rank_stats.mean_average_precision_at_k(1),
-                    rank_stats.mean_average_precision_at_k(3),
-                    rank_stats.mean_average_precision_at_k(5),
-                    rank_stats.mean_average_precision_at_k(10),
-                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 1),
-                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top3(), 3),
-                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top5(), 5),
-                    rank_stats.f_beta_score_at_k(rank_stats.percentage_top10(), 10),
-                    rank_stats.mean_normalised_discounted_cumulative_gain(3),
-                    rank_stats.mean_normalised_discounted_cumulative_gain(5),
-                    rank_stats.mean_normalised_discounted_cumulative_gain(10),
-                    binary_classification.true_positives,
-                    binary_classification.false_positives,
-                    binary_classification.true_negatives,
-                    binary_classification.false_negatives,
-                    binary_classification.sensitivity(),
-                    binary_classification.specificity(),
-                    binary_classification.precision(),
-                    binary_classification.negative_predictive_value(),
-                    binary_classification.false_positive_rate(),
-                    binary_classification.false_discovery_rate(),
-                    binary_classification.false_negative_rate(),
-                    binary_classification.accuracy(),
-                    binary_classification.f1_score(),
-                    binary_classification.matthews_correlation_coefficient(),
-                ]
-            )
-        except IOError:
-            print("Error writing ", self.file)
-
-    def close(self) -> None:
-        """
-        Close the file used for writing rank statistics.
-
-        Raises:
-            IOError: If there's an error while closing the file.
-        """
-        try:
-            self.file.close()
-        except IOError:
-            print("Error closing ", self.file)
+            directory_path (Path): Path to the results directory associated with the run.
+            rank_stats (RankStats): RankStats object for the run.
+            binary_classification (BinaryClassificationStats): BinaryClassificationStats object for the run.
+        """
+        conn = get_connection()
+        conn.execute(f"""
+                INSERT INTO "{self.table_name}" VALUES 
+                (
+                '{directory_path}',
+                {rank_stats.top},
+                {rank_stats.top3},
+                {rank_stats.top5},
+                {rank_stats.top10},
+                {rank_stats.found},
+                {rank_stats.total},
+                {rank_stats.mean_reciprocal_rank()},
+                {rank_stats.percentage_top()},
+                {rank_stats.percentage_top3()},
+                {rank_stats.percentage_top5()},
+                {rank_stats.percentage_top10()},
+                {rank_stats.percentage_found()},
+                {rank_stats.precision_at_k(1)},
+                {rank_stats.precision_at_k(3)},
+                {rank_stats.precision_at_k(5)},
+                {rank_stats.precision_at_k(10)},
+                {rank_stats.mean_average_precision_at_k(1)},
+                {rank_stats.mean_average_precision_at_k(3)},
+                {rank_stats.mean_average_precision_at_k(5)},
+                {rank_stats.mean_average_precision_at_k(10)},
+                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 1)},
+                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 3)},
+                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 5)},
+                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 10)},
+                {rank_stats.mean_normalised_discounted_cumulative_gain(3)},
+                {rank_stats.mean_normalised_discounted_cumulative_gain(5)},
+                {rank_stats.mean_normalised_discounted_cumulative_gain(10)},
+                {binary_classification.true_positives},
+                {binary_classification.false_positives},
+                {binary_classification.true_negatives},
+                {binary_classification.false_negatives},
+                {binary_classification.sensitivity()},
+                {binary_classification.specificity()},
+                {binary_classification.precision()},
+                {binary_classification.negative_predictive_value()},
+                {binary_classification.false_positive_rate()},
+                {binary_classification.false_discovery_rate()},
+                {binary_classification.false_negative_rate()},
+                {binary_classification.accuracy()},
+                {binary_classification.f1_score()},
+                {binary_classification.matthews_correlation_coefficient()},)""")
+        conn.close()

From 5d2b0012394057399b311356c34ead61a694407e Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 18:31:47 +0100
Subject: [PATCH 09/81] add `phenopacket_dir` variable to `BenchmarkRunResults`

---
 src/pheval/analyse/benchmarking_data.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pheval/analyse/benchmarking_data.py b/src/pheval/analyse/benchmarking_data.py
index 7afd76736..dc3de55c8 100644
--- a/src/pheval/analyse/benchmarking_data.py
+++ b/src/pheval/analyse/benchmarking_data.py
@@ -11,14 +11,15 @@ class BenchmarkRunResults:
     Benchmarking results for a run.
 
     Attributes:
-        ranks (dict): Dictionary containing recorded ranks for samples.
         rank_stats (RankStats): Statistics related to benchmark.
+        binary_classification_stats (BinaryClassificationStats): Binary statistics related to benchmark.
         results_dir (Path, optional): Path to the result directory. Defaults to None.
         benchmark_name (str, optional): Name of the benchmark run. Defaults to None.
+        phenopacket_dir (Path, optional): Path to the phenopacket directory. Defaults to None.
     """
 
-    ranks: dict
     rank_stats: RankStats
     binary_classification_stats: BinaryClassificationStats
     results_dir: Path = None
     benchmark_name: str = None
+    phenopacket_dir: Path = None

From cffd41b5ba95d249ed41b6523750120eb21bcc04 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 18:32:19 +0100
Subject: [PATCH 10/81] calculate rank stats

---
 src/pheval/analyse/gene_prioritisation_analysis.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index df231a8bc..7334b090c 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -1,6 +1,5 @@
 import ast
 import re
-from collections import defaultdict
 from pathlib import Path
 from typing import List, Union
 
@@ -203,7 +202,7 @@ def assess_phenopacket_gene_prioritisation(
         threshold,
         score_order,
     ).assess_gene_prioritisation(
-       gene_binary_classification_stats
+        gene_binary_classification_stats
     )
 
 
@@ -211,7 +210,6 @@ def benchmark_gene_prioritisation(
         results_directory_and_input: TrackInputOutputDirectories,
         score_order: str,
         threshold: float,
-        gene_rank_comparison: defaultdict,
 ) -> BenchmarkRunResults:
     """
     Benchmark a directory based on gene prioritisation results.
@@ -219,12 +217,10 @@ def benchmark_gene_prioritisation(
          results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
          score_order (str): The order in which scores are arranged.
          threshold (float): Threshold for assessment.
-         gene_rank_comparison (defaultdict): Default dictionary for gene rank comparisons.
      Returns:
          BenchmarkRunResults: An object containing benchmarking results for gene prioritisation,
          including ranks and rank statistics for the benchmarked directory.
     """
-    gene_rank_stats = RankStats()
     gene_binary_classification_stats = BinaryClassificationStats()
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_gene_prioritisation(
@@ -234,9 +230,13 @@ def benchmark_gene_prioritisation(
             threshold,
             gene_binary_classification_stats,
         )
+    gene_rank_stats = RankStats()
+    gene_rank_stats.add_ranks(
+        table_name=f'{results_directory_and_input.phenopacket_dir.parents[0].name}_gene',
+        column_name=str(results_directory_and_input.results_dir))
     return BenchmarkRunResults(
-        results_dir=results_directory_and_input.results_dir,
-        ranks=gene_rank_comparison,
         rank_stats=gene_rank_stats,
+        results_dir=results_directory_and_input.results_dir,
         binary_classification_stats=gene_binary_classification_stats,
+        phenopacket_dir=results_directory_and_input.phenopacket_dir,
     )

From 745d51b5f350f1dda4a2f1ea71978db73db80e5e Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 18:39:28 +0100
Subject: [PATCH 11/81] refactor constant variable names

---
 src/pheval/constants.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pheval/constants.py b/src/pheval/constants.py
index 102d7e28a..0c054462b 100644
--- a/src/pheval/constants.py
+++ b/src/pheval/constants.py
@@ -1,8 +1,8 @@
 PHEVAL_RESULTS_DIRECTORY_SUFFIX = "_results"
-GENE_PLOT_FILE_PREFIX = "gene"
+GENE_PRIORITISATION_TYPE_STR = "gene"
 GENE_PLOT_Y_LABEL = "Disease-causing genes (%)"
-VARIANT_PLOT_FILE_PREFIX = "variant"
+VARIANT_PRIORITISATION_TYPE_STR = "variant"
 VARIANT_PLOT_Y_LABEL = "Disease-causing variants (%)"
-DISEASE_PLOT_FILE_PREFIX = "disease"
+DISEASE_PRIORITISATION_TYPE_STR = "disease"
 DISEASE_PLOT_Y_LABEL = "Known diseases (%)"
 RANK_COMPARISON_FILE_SUFFIX = "_rank_comparison.tsv"

From 4cd2b06a7d232017f3fde4683032cff3283c2465 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 19:29:07 +0100
Subject: [PATCH 12/81] refactor structure to connect to DB once when
 benchmarking whole directory

---
 .../analyse/gene_prioritisation_analysis.py   | 63 ++++++++-----------
 1 file changed, 26 insertions(+), 37 deletions(-)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 7334b090c..3286e7d7d 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -3,7 +3,6 @@
 from pathlib import Path
 from typing import List, Union
 
-import duckdb
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
@@ -11,7 +10,7 @@
 from pheval.analyse.prioritisation_result_types import GenePrioritisationResult
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
-from pheval.analyse.get_connection import get_connection
+from pheval.analyse.get_connection import DBConnector
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
 from pheval.utils.file_utils import all_files
 
@@ -21,9 +20,9 @@ class AssessGenePrioritisation:
 
     def __init__(
             self,
-            phenopacket_path: Path,
+            db_connection: DBConnector,
+            table_name: str,
             results_dir: Path,
-            standardised_gene_results: List[RankedPhEvalGeneResult],
             threshold: float,
             score_order: str,
     ):
@@ -31,28 +30,17 @@ def __init__(
         Initialise AssessGenePrioritisation class.
 
         Args:
-            phenopacket_path (Path): Path to the phenopacket file
             results_dir (Path): Path to the results directory
-            standardised_gene_results (List[RankedPhEvalGeneResult]): List of ranked PhEval gene results
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending
         """
-        self.phenopacket_path = phenopacket_path
         self.results_dir = results_dir
-        self.standardised_gene_results = standardised_gene_results
         self.threshold = threshold
         self.score_order = score_order
-        self.conn = get_connection()
-        self.new_col = str(self.results_dir.parents[0])
-        self.table_name = f"{phenopacket_path.parents[1].name}_gene"
-        try:
-            self.conn.execute(
-                f'ALTER TABLE {self.table_name} ADD COLUMN "{self.new_col}" INTEGER DEFAULT 0'
-            )
-            self.conn.execute(f'UPDATE {self.table_name} SET "{self.new_col}" = 0')
-            self.conn.commit()
-        except duckdb.CatalogException:
-            pass
+        self.conn = db_connection.conn
+        self.column = str(self.results_dir.parents[0])
+        self.table_name = table_name
+        db_connection.add_column(table_name=table_name, column=self.column, default=0)
 
     def _assess_gene_with_threshold_ascending_order(
             self,
@@ -132,6 +120,8 @@ def _check_string_representation(entity: str) -> Union[List[str], str]:
 
     def assess_gene_prioritisation(
             self,
+            standardised_gene_results: List[RankedPhEvalGeneResult],
+            phenopacket_path: Path,
             binary_classification_stats: BinaryClassificationStats,
     ) -> None:
         """
@@ -144,10 +134,10 @@ def assess_gene_prioritisation(
         """
         relevant_ranks = []
         df = self.conn.execute(
-            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{self.phenopacket_path.name}'""").fetchdf()
+            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'""").fetchdf()
         for i, row in df.iterrows():
             generated_matches = list(
-                result for result in self.standardised_gene_results
+                result for result in standardised_gene_results
                 if (
                         isinstance(self._check_string_representation(result.gene_identifier), list)
                         and row["gene_identifier"] in self._check_string_representation(result.gene_identifier)
@@ -162,23 +152,21 @@ def assess_gene_prioritisation(
             if len(generated_matches) > 0:
                 gene_match = self._record_matched_gene(generated_matches[0])
                 relevant_ranks.append(gene_match)
-                primary_key = f"{self.phenopacket_path.name}-{row['gene_symbol']}"
+                primary_key = f"{phenopacket_path.name}-{row['gene_symbol']}"
                 self.conn.execute(
-                    f'UPDATE {self.table_name} SET "{self.new_col}" = ? WHERE identifier = ?',
+                    f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (gene_match, primary_key),
                 )
         binary_classification_stats.add_classification(
-            pheval_results=self.standardised_gene_results, relevant_ranks=relevant_ranks
+            pheval_results=standardised_gene_results, relevant_ranks=relevant_ranks
         )
-        self.conn.close()
 
 
 def assess_phenopacket_gene_prioritisation(
         phenopacket_path: Path,
-        score_order: str,
         results_dir_and_input: TrackInputOutputDirectories,
-        threshold: float,
         gene_binary_classification_stats: BinaryClassificationStats,
+        gene_benchmarker: AssessGenePrioritisation,
 ) -> None:
     """
     Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results
@@ -186,22 +174,16 @@ def assess_phenopacket_gene_prioritisation(
 
     Args:
         phenopacket_path (Path): Path to the Phenopacket.
-        score_order (str): The order in which scores are arranged, either ascending or descending.
         results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
-        threshold (float): Threshold for assessment.
         gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
     """
     standardised_gene_result = results_dir_and_input.results_dir.joinpath(
         f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
     )
     pheval_gene_result = read_standardised_result(standardised_gene_result)
-    AssessGenePrioritisation(
-        phenopacket_path,
-        results_dir_and_input.results_dir.joinpath("pheval_gene_results/"),
+    gene_benchmarker.assess_gene_prioritisation(
         parse_pheval_result(RankedPhEvalGeneResult, pheval_gene_result),
-        threshold,
-        score_order,
-    ).assess_gene_prioritisation(
+        phenopacket_path,
         gene_binary_classification_stats
     )
 
@@ -222,14 +204,21 @@ def benchmark_gene_prioritisation(
          including ranks and rank statistics for the benchmarked directory.
     """
     gene_binary_classification_stats = BinaryClassificationStats()
+    db_connection = DBConnector()
+    gene_benchmarker = AssessGenePrioritisation(db_connection=db_connection,
+                                                table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_gene",
+                                                results_dir=results_directory_and_input.results_dir.joinpath("pheval_gene_results/"),
+                                                threshold=threshold,
+                                                score_order=score_order
+                                                )
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_gene_prioritisation(
             phenopacket_path,
-            score_order,
             results_directory_and_input,
-            threshold,
             gene_binary_classification_stats,
+            gene_benchmarker
         )
+    db_connection.close()
     gene_rank_stats = RankStats()
     gene_rank_stats.add_ranks(
         table_name=f'{results_directory_and_input.phenopacket_dir.parents[0].name}_gene',

From 6ba558019ffcafedcc60e8bd3f2bdf9f2bc8ddb1 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 19:32:43 +0100
Subject: [PATCH 13/81] refactor db connection

---
 src/pheval/analyse/rank_stats.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/pheval/analyse/rank_stats.py b/src/pheval/analyse/rank_stats.py
index 27e19c3df..0ccc23506 100644
--- a/src/pheval/analyse/rank_stats.py
+++ b/src/pheval/analyse/rank_stats.py
@@ -8,7 +8,7 @@
 from sklearn.metrics import ndcg_score
 
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.get_connection import get_connection
+from pheval.analyse.get_connection import DBConnector
 
 
 @dataclass
@@ -38,7 +38,7 @@ class RankStats:
     mrr: float = None
 
     def add_ranks(self, table_name: str, column_name: str):
-        conn = get_connection()
+        conn = DBConnector().conn
         self.top = self._execute_count_query(conn, table_name, column_name, " = 1")
         self.top3 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 3")
         self.top5 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 5")
@@ -292,7 +292,7 @@ def __init__(self, table_name: str):
         """
 
         self.table_name = table_name
-        conn = get_connection()
+        conn = DBConnector().conn
         conn.execute(
             f"""
                     CREATE TABLE IF NOT EXISTS "{self.table_name}" (
@@ -355,7 +355,7 @@ def add_statistics_entry(self,
             rank_stats (RankStats): RankStats object for the run.
             binary_classification (BinaryClassificationStats): BinaryClassificationStats object for the run.
         """
-        conn = get_connection()
+        conn = DBConnector().conn
         conn.execute(f"""
                 INSERT INTO "{self.table_name}" VALUES 
                 (

From 81a8e494cdc015bd5b2a353d1f0b4979c0f255e7 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 19:32:48 +0100
Subject: [PATCH 14/81] refactor db connection

---
 src/pheval/analyse/parse_corpus.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
index a7cbdc001..f63766434 100644
--- a/src/pheval/analyse/parse_corpus.py
+++ b/src/pheval/analyse/parse_corpus.py
@@ -3,7 +3,7 @@
 
 from pheval.analyse.benchmark_generator import GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, \
     DiseaseBenchmarkRunOutputGenerator, BenchmarkRunOutputGenerator
-from pheval.get_connection import get_connection
+from pheval.analyse.get_connection import DBConnector
 from pheval.utils.file_utils import all_files
 from pheval.utils.phenopacket_utils import GenomicVariant, ProbandCausativeGene, phenopacket_reader, PhenopacketUtil, \
     ProbandDisease
@@ -63,7 +63,7 @@ def __init__(self, phenopacket_dir: Path) -> None:
             phenopacket_dir (Path): Path to the Phenopacket directory.
         """
         self.phenopacket_dir = phenopacket_dir
-        self.conn = get_connection()
+        self.conn = DBConnector().conn
         self.table_name = phenopacket_dir.parents[0].name
 
     def _create_gene_table(self) -> None:

From 74b29a3fedaba68b55021d447bb304ce772c2164 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 19:32:58 +0100
Subject: [PATCH 15/81] refactor DBConnector

---
 src/pheval/analyse/get_connection.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index 4e8c936d3..34915e269 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -9,9 +9,23 @@ def dict_factory(cursor, row):
     return d
 
 
-def get_connection() -> DuckDBPyConnection:
-    """
-    Get a connection to the benchmarking results database.
-    """
-    conn = duckdb.connect("analysis.db")
-    return conn
+class DBConnector:
+
+    def __init__(self):
+        self.conn = self.get_connection()
+
+    @staticmethod
+    def get_connection() -> DuckDBPyConnection:
+        conn = duckdb.connect("analysis.db")
+        return conn
+
+    def add_column(self, table_name: str, column: str, default: int=0) -> None:
+        try:
+            self.conn.execute(f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}')
+            self.conn.execute(f'UPDATE {table_name} SET "{column}" = {default}')
+            self.conn.commit()
+        except  duckdb.CatalogException:
+            pass
+
+    def close(self):
+        self.conn.close()

From 6ab525d2d1d52eaef8bc7052397ae5d31b05cb31 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 19:33:19 +0100
Subject: [PATCH 16/81] refactor prioritisation type strings

---
 src/pheval/analyse/benchmark_generator.py | 28 +++++++++++------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index fe14dbd99..8dcc84f3a 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -8,11 +8,11 @@
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
 from pheval.constants import (
-    DISEASE_PLOT_FILE_PREFIX,
+    DISEASE_PRIORITISATION_TYPE_STR,
     DISEASE_PLOT_Y_LABEL,
-    GENE_PLOT_FILE_PREFIX,
+    GENE_PRIORITISATION_TYPE_STR,
     GENE_PLOT_Y_LABEL,
-    VARIANT_PLOT_FILE_PREFIX,
+    VARIANT_PRIORITISATION_TYPE_STR,
     VARIANT_PLOT_Y_LABEL,
 )
 
@@ -22,7 +22,7 @@ class BenchmarkRunOutputGenerator:
     """Base class for recording data required for generating benchmarking outputs.
 
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the prioritisation type output file.
+        prioritisation_type_string (str):  Prioritisation type string.
         y_label (str): Label for the y-axis in benchmarking outputs.
         generate_benchmark_run_results (Callable): Callable to generate benchmark run results.
             Takes parameters: input and results directory, score order, threshold, rank comparison,
@@ -30,7 +30,7 @@ class BenchmarkRunOutputGenerator:
         stats_comparison_file_suffix (str): Suffix for the rank comparison file.
     """
 
-    prioritisation_type_file_prefix: str
+    prioritisation_type_string: str
     y_label: str
     generate_benchmark_run_results: Callable[
         [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
@@ -48,8 +48,8 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for gene prioritisation benchmarking.
 
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the gene prioritisation type file.
-            Defaults to GENE_PLOT_FILE_PREFIX.
+        prioritisation_type_string (str): Prioritisation type string.
+            Defaults to GENE_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs.
             Defaults to GENE_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate gene prioritisation
@@ -60,7 +60,7 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             Defaults to "-gene_summary.tsv".
     """
 
-    prioritisation_type_file_prefix: str = GENE_PLOT_FILE_PREFIX
+    prioritisation_type_string: str = GENE_PRIORITISATION_TYPE_STR
     y_label: str = GENE_PLOT_Y_LABEL
     generate_benchmark_run_results: Callable[
         [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
@@ -78,8 +78,8 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for variant prioritisation benchmarking.
 
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the variant prioritisation type file.
-            Defaults to VARIANT_PLOT_FILE_PREFIX.
+        prioritisation_type_string (str): Prioritisation type string.
+            Defaults to VARIANT_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs.
             Defaults to VARIANT_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate variant prioritisation
@@ -91,7 +91,7 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
 
     """
 
-    prioritisation_type_file_prefix: str = VARIANT_PLOT_FILE_PREFIX
+    prioritisation_type_string: str = VARIANT_PRIORITISATION_TYPE_STR
     y_label: str = VARIANT_PLOT_Y_LABEL
     generate_benchmark_run_results: Callable[
         [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
@@ -109,8 +109,8 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for disease prioritisation benchmarking.
 
     Attributes:
-        prioritisation_type_file_prefix (str): Prefix for the disease prioritisation type file.
-            Defaults to DISEASE_PLOT_FILE_PREFIX.
+        prioritisation_type_string (str): Prioritisation type string.
+            Defaults to DISEASE_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs.
             Defaults to DISEASE_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate disease prioritisation
@@ -121,7 +121,7 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             Defaults to "-disease_summary.tsv".
     """
 
-    prioritisation_type_file_prefix: str = DISEASE_PLOT_FILE_PREFIX
+    prioritisation_type_string: str = DISEASE_PRIORITISATION_TYPE_STR
     y_label: str = DISEASE_PLOT_Y_LABEL
     generate_benchmark_run_results: Callable[
         [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults

From 4c153ed615840f80e9ab6da2cc80456aef650d3e Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 19:34:00 +0100
Subject: [PATCH 17/81] adding missing args to docstrings

---
 .../analyse/gene_prioritisation_analysis.py       | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 3286e7d7d..5628d2042 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -130,6 +130,8 @@ def assess_gene_prioritisation(
         and records ranks using a PrioritisationRankRecorder.
 
         Args:
+            standardised_gene_results (List[RankedPhEvalGeneResult]) List of standardised gene results.
+            phenopacket_path (Path): Path to the Phenopacket.
             binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         """
         relevant_ranks = []
@@ -176,6 +178,7 @@ def assess_phenopacket_gene_prioritisation(
         phenopacket_path (Path): Path to the Phenopacket.
         results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
         gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
+        gene_benchmarker (AssessGenePrioritisation): AssessGenePrioritisation class instance.
     """
     standardised_gene_result = results_dir_and_input.results_dir.joinpath(
         f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
@@ -205,11 +208,13 @@ def benchmark_gene_prioritisation(
     """
     gene_binary_classification_stats = BinaryClassificationStats()
     db_connection = DBConnector()
-    gene_benchmarker = AssessGenePrioritisation(db_connection=db_connection,
-                                                table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_gene",
-                                                results_dir=results_directory_and_input.results_dir.joinpath("pheval_gene_results/"),
-                                                threshold=threshold,
-                                                score_order=score_order
+    gene_benchmarker = AssessGenePrioritisation(db_connection,
+                                                f"{results_directory_and_input.phenopacket_dir.parents[0].name}"
+                                                f"_gene",
+                                                results_directory_and_input.results_dir.joinpath(
+                                                    "pheval_gene_results/"),
+                                                threshold,
+                                                score_order
                                                 )
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_gene_prioritisation(

From 780b0f6c54c0ba22cb06449b0e33f5915ba5fc6d Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 9 Jul 2024 19:35:35 +0100
Subject: [PATCH 18/81] add rank stats to table rather than writing to file

---
 src/pheval/analyse/analysis.py | 69 ++++++++++++++++------------------
 1 file changed, 33 insertions(+), 36 deletions(-)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index 2258127db..263a7b843 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -18,12 +18,12 @@
 
 
 def _run_benchmark(
-    results_dir_and_input: TrackInputOutputDirectories,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
+        results_dir_and_input: TrackInputOutputDirectories,
+        score_order: str,
+        output_prefix: str,
+        threshold: float,
+        plot_type: str,
+        benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """Run a benchmark on a result directory.
 
@@ -37,30 +37,29 @@ def _run_benchmark(
     """
     CorpusParser(results_dir_and_input.phenopacket_dir).parse_corpus(benchmark_generator)
     stats_writer = RankStatsWriter(
-        Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
+        str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
     rank_comparison = defaultdict(dict)
     benchmark_result = benchmark_generator.generate_benchmark_run_results(
         results_dir_and_input, score_order, threshold, rank_comparison
     )
-    stats_writer.write_row(
+    stats_writer.add_statistics_entry(
         results_dir_and_input.results_dir,
         benchmark_result.rank_stats,
         benchmark_result.binary_classification_stats,
     )
     generate_benchmark_output(benchmark_result, plot_type, benchmark_generator)
-    stats_writer.close()
 
 
 def benchmark_directory(
-    results_dir_and_input: TrackInputOutputDirectories,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
+        results_dir_and_input: TrackInputOutputDirectories,
+        score_order: str,
+        output_prefix: str,
+        threshold: float,
+        gene_analysis: bool,
+        variant_analysis: bool,
+        disease_analysis: bool,
+        plot_type: str,
 ) -> None:
     """
     Benchmark prioritisation performance for a single run.
@@ -105,12 +104,12 @@ def benchmark_directory(
 
 
 def _run_benchmark_comparison(
-    results_directories: List[TrackInputOutputDirectories],
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
+        results_directories: List[TrackInputOutputDirectories],
+        score_order: str,
+        output_prefix: str,
+        threshold: float,
+        plot_type: str,
+        benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
     Run a benchmark on several result directories.
@@ -125,36 +124,34 @@ def _run_benchmark_comparison(
         benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
     """
     stats_writer = RankStatsWriter(
-        Path(output_prefix + benchmark_generator.stats_comparison_file_suffix)
+        str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
     unique_test_corpora_directories = set([result.phenopacket_dir for result in results_directories])
     [CorpusParser(test_corpora_directory).parse_corpus(benchmark_generator) for test_corpora_directory in
      unique_test_corpora_directories]
     benchmarking_results = []
     for results_dir_and_input in results_directories:
-        rank_comparison = defaultdict(dict)
         benchmark_result = benchmark_generator.generate_benchmark_run_results(
-            results_dir_and_input, score_order, threshold, rank_comparison
+            results_dir_and_input, score_order, threshold
         )
-        stats_writer.write_row(
+        stats_writer.add_statistics_entry(
             results_dir_and_input.results_dir,
             benchmark_result.rank_stats,
             benchmark_result.binary_classification_stats,
         )
         benchmarking_results.append(benchmark_result)
     generate_benchmark_comparison_output(benchmarking_results, plot_type, benchmark_generator)
-    stats_writer.close()
 
 
 def benchmark_run_comparisons(
-    results_directories: List[TrackInputOutputDirectories],
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
+        results_directories: List[TrackInputOutputDirectories],
+        score_order: str,
+        output_prefix: str,
+        threshold: float,
+        gene_analysis: bool,
+        variant_analysis: bool,
+        disease_analysis: bool,
+        plot_type: str,
 ) -> None:
     """
     Benchmark prioritisation performance for several runs.

From fa8ddb4ab4e9ca64cf81afd7a7e766f20001df9b Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 13:57:14 +0100
Subject: [PATCH 19/81] refactor method name for adding column

---
 src/pheval/analyse/gene_prioritisation_analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 5628d2042..ba4bba5f3 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -40,7 +40,7 @@ def __init__(
         self.conn = db_connection.conn
         self.column = str(self.results_dir.parents[0])
         self.table_name = table_name
-        db_connection.add_column(table_name=table_name, column=self.column, default=0)
+        db_connection.add_column_integer_default(table_name=table_name, column=self.column, default=0)
 
     def _assess_gene_with_threshold_ascending_order(
             self,

From 60c9dd12c2d08300ae0b29a52ac323c70266a7df Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 13:57:41 +0100
Subject: [PATCH 20/81] refactor variable name

---
 src/pheval/analyse/generate_plots.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index 9262f9dcf..66e265181 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -168,13 +168,13 @@ def generate_stacked_bar_plot(
         ).legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
         if title is None:
             plt.title(
-                f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} Rank Stats"
+                f"{benchmark_generator.prioritisation_type_string.capitalize()} Rank Stats"
             )
         else:
             plt.title(title, loc="center", fontsize=15)
         plt.ylim(0, 100)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_rank_stats.svg",
+            f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -183,16 +183,16 @@ def generate_stacked_bar_plot(
         mrr_df.set_index("Run").plot(
             kind="bar",
             color=self.palette_hex_codes,
-            ylabel=f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} mean reciprocal rank",
+            ylabel=f"{benchmark_generator.prioritisation_type_string.capitalize()} mean reciprocal rank",
             legend=False,
             edgecolor="white",
         )
         plt.title(
-            f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} results - mean reciprocal rank"
+            f"{benchmark_generator.prioritisation_type_string.capitalize()} results - mean reciprocal rank"
         )
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_mrr.svg",
+            f"{benchmark_generator.prioritisation_type_string}_mrr.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -280,13 +280,13 @@ def generate_cumulative_bar(
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=3, title="Run")
         if title is None:
             plt.title(
-                f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} Cumulative Rank Stats"
+                f"{benchmark_generator.prioritisation_type_string.capitalize()} Cumulative Rank Stats"
             )
         else:
             plt.title(title, loc="center", fontsize=15)
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_rank_stats.svg",
+            f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -391,7 +391,7 @@ def generate_roc_curve(
         plt.title("Receiver Operating Characteristic (ROC) Curve")
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_roc_curve.svg",
+            f"{benchmark_generator.prioritisation_type_string}_roc_curve.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -429,7 +429,7 @@ def generate_precision_recall(
         plt.title("Precision-Recall Curve")
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_precision_recall_curve.svg",
+            f"{benchmark_generator.prioritisation_type_string}_precision_recall_curve.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -465,13 +465,13 @@ def generate_non_cumulative_bar(
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=3, title="Run")
         if title is None:
             plt.title(
-                f"{benchmark_generator.prioritisation_type_file_prefix.capitalize()} Non-Cumulative Rank Stats"
+                f"{benchmark_generator.prioritisation_type_string.capitalize()} Non-Cumulative Rank Stats"
             )
         else:
             plt.title(title, loc="center", fontsize=15)
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_file_prefix}_rank_stats.svg",
+            f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )

From 0b85056a6b9396c24f23af8745761393b168e414 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 14:50:01 +0100
Subject: [PATCH 21/81] removing `RankComparisonGenerator` class and replacing
 with methods to create comparison tables with duckdb

---
 .../analyse/generate_summary_outputs.py       | 173 +++++-------------
 1 file changed, 50 insertions(+), 123 deletions(-)

diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index 45cd5a417..95679b0c6 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -1,86 +1,17 @@
 import itertools
-from collections import defaultdict
-from copy import deepcopy
+from pathlib import Path
 from typing import List
-
-import numpy as np
-import pandas as pd
-
 from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.generate_plots import generate_plots
-from pheval.constants import RANK_COMPARISON_FILE_SUFFIX
-
-
-class RankComparisonGenerator:
-    """Class for writing the run comparison of rank assignment for prioritisation."""
-
-    def __init__(self, run_comparison: defaultdict):
-        """
-        Initialise the RankComparisonGenerator class.
-
-        Args:
-            run_comparison (defaultdict): A nested dictionary containing the run comparison data.
-        """
-        self.run_comparison = run_comparison
-
-    def _generate_dataframe(self) -> pd.DataFrame:
-        """
-        Generate a Pandas DataFrame based on the run comparison data.
-
-        Returns:
-            pd.DataFrame: DataFrame containing the run comparison data.
-        """
-        return pd.DataFrame.from_dict(self.run_comparison, orient="index")
-
-    def _calculate_rank_difference(self) -> pd.DataFrame:
-        """
-        Calculate the rank decrease for runs, taking the first directory as a baseline.
-
-        Returns:
-            pd.DataFrame: DataFrame containing the calculated rank differences.
-        """
-        comparison_df = self._generate_dataframe()
-        comparison_df["rank_change"] = comparison_df.iloc[:, 2] - comparison_df.iloc[:, 3]
-        comparison_df["rank_change"] = np.where(
-            (comparison_df.iloc[:, 2] == 0) & (comparison_df.iloc[:, 3] != 0),
-            "GAINED",
-            np.where(
-                (comparison_df.iloc[:, 3] == 0) & (comparison_df.iloc[:, 2] != 0),
-                "LOST",
-                comparison_df["rank_change"],
-            ),
-        )
-        comparison_df["rank_change"] = comparison_df["rank_change"].apply(
-            lambda x: int(x) if str(x).lstrip("-").isdigit() else x
-        )
-        return comparison_df
-
-    def generate_output(self, prefix: str, suffix: str) -> None:
-        """
-        Generate output file from the run comparison data.
-
-        Args:
-            prefix (str): Prefix for the output file name.
-            suffix (str): Suffix for the output file name.
-        """
-        self._generate_dataframe().to_csv(prefix + suffix, sep="\t")
-
-    def generate_comparison_output(self, prefix: str, suffix: str) -> None:
-        """
-        Generate output file with calculated rank differences.
-
-        Args:
-            prefix (str): Prefix for the output file name.
-            suffix (str): Suffix for the output file name.
-        """
-        self._calculate_rank_difference().to_csv(prefix + suffix, sep="\t")
+from pheval.constants import RANK_COMPARISON_SUFFIX
+from pheval.analyse.get_connection import DBConnector
 
 
 def generate_benchmark_output(
-    benchmarking_results: BenchmarkRunResults,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
+        benchmarking_results: BenchmarkRunResults,
+        plot_type: str,
+        benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
     Generate prioritisation outputs for a single benchmarking run.
@@ -90,12 +21,13 @@ def generate_benchmark_output(
         plot_type (str): Type of plot to generate.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
     """
-    rank_comparison_data = benchmarking_results.ranks
     results_dir_name = benchmarking_results.results_dir.name
-    RankComparisonGenerator(rank_comparison_data).generate_output(
-        f"{results_dir_name}",
-        f"-{benchmark_generator.prioritisation_type_file_prefix}{RANK_COMPARISON_FILE_SUFFIX}",
-    )
+    conn = DBConnector().conn
+    conn.execute(
+        f"""CREATE TABLE {results_dir_name}_{benchmark_generator.prioritisation_type_string}
+        {RANK_COMPARISON_SUFFIX} AS SELECT * EXCLUDE (identifier) FROM 
+        {benchmarking_results.phenopacket_dir.parents[0].name}_{benchmark_generator.prioritisation_type_string}""")
+    conn.close()
     generate_plots(
         [benchmarking_results],
         benchmark_generator,
@@ -103,41 +35,40 @@ def generate_benchmark_output(
     )
 
 
-def merge_results(result1: dict, result2: dict) -> defaultdict:
+def get_new_table_name(result_dir_1: Path, result_dir_2: Path, output_prefix: str) -> str:
     """
-    Merge two nested dictionaries containing results on commonalities.
-
-    This function merges two dictionaries, `result1` and `result2`, containing nested structures.
-    It traverses the dictionaries recursively and merges their contents based on common keys.
-    If a key is present in both dictionaries and points to another dictionary, the function
-    will further merge their nested contents. If a key exists in `result2` but not in `result1`,
-    it will be added to `result1`.
-
+    Get the new table name for rank comparison tables.
     Args:
-        result1 (dict): The first dictionary to be merged.
-        result2 (dict): The second dictionary to be merged.
-
+        result_dir_1: The path to the first result directory.
+        result_dir_2: The path to the second result directory.
+        output_prefix: The output prefix of the table
     Returns:
-        defaultdict: The merged dictionary containing the combined contents of `result1` and `result2`.
+        The new table name.
     """
-    for key, val in result1.items():
-        if type(val) == dict:
-            if key in result2 and type(result2[key] == dict):
-                merge_results(result1[key], result2[key])
-        else:
-            if key in result2:
-                result1[key] = result2[key]
+    return (f"{Path(result_dir_1).parents[0].name}_{Path(result_dir_1).name}_vs_"
+            f"{Path(result_dir_2).parents[0].name}_{Path(result_dir_2).name}_"
+            f"{output_prefix}{RANK_COMPARISON_SUFFIX}")
 
-    for key, val in result2.items():
-        if key not in result1:
-            result1[key] = val
-    return result1
+
+def create_comparison_table(comparison_table_name: str, connector: DBConnector, drop_columns: List[str],
+                            result_dir_1: str, result_dir_2: str, table_name: str) -> None:
+    connector.drop_table(comparison_table_name)
+    connector.conn.execute(
+        f"""CREATE TABLE "{comparison_table_name}" AS SELECT * EXCLUDE 
+        ('{", ".join(drop_columns)}', identifier) FROM {table_name}""")
+    connector.conn.execute(f"""ALTER TABLE "{comparison_table_name}" ADD COLUMN rank_change VARCHAR;""")
+    connector.conn.execute(
+        f"""UPDATE "{comparison_table_name}" SET rank_change = CASE WHEN "{result_dir_1}" = 0 
+        AND "{result_dir_2}" != 0 THEN 'GAINED' WHEN "{result_dir_1}" != 0 
+        AND "{result_dir_2}" = 0 THEN 'LOST' ELSE CAST ("{result_dir_1}" - "{result_dir_2}" AS VARCHAR) END;""")
+    connector.conn.commit()
 
 
 def generate_benchmark_comparison_output(
-    benchmarking_results: List[BenchmarkRunResults],
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
+        benchmarking_results: List[BenchmarkRunResults],
+        plot_type: str,
+        benchmark_generator: BenchmarkRunOutputGenerator,
+        table_name: str
 ) -> None:
     """
     Generate prioritisation outputs for benchmarking multiple runs.
@@ -151,23 +82,19 @@ def generate_benchmark_comparison_output(
             representing the benchmarking results of multiple runs.
         plot_type (str): The type of plot to be generated.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
+        table_name (str): The name of the table where ranks are stored.
     """
-    output_prefix = benchmark_generator.prioritisation_type_file_prefix
-    for pair in itertools.combinations(benchmarking_results, 2):
-        result1 = pair[0]
-        result2 = pair[1]
-        merged_results = merge_results(
-            deepcopy(result1.ranks),
-            deepcopy(result2.ranks),
-        )
-        RankComparisonGenerator(merged_results).generate_comparison_output(
-            f"{result1.results_dir.parents[0].name}_"
-            f"{result1.results_dir.name}"
-            f"_vs_{result2.results_dir.parents[0].name}_"
-            f"{result2.results_dir.name}",
-            f"-{output_prefix}{RANK_COMPARISON_FILE_SUFFIX}",
-        )
-
+    output_prefix = benchmark_generator.prioritisation_type_string
+    connector = DBConnector()
+    run_columns = [column for column in
+                   connector.conn.execute(f"PRAGMA table_info('{table_name}');").fetchdf()['name'].to_list()
+                   if "/" in column]
+    for pair in itertools.combinations([str(result.results_dir) for result in benchmarking_results], 2):
+        result_dir_1 = pair[0]
+        result_dir_2 = pair[1]
+        drop_columns = [run for run in run_columns if run not in pair]
+        comparison_table_name = get_new_table_name(result_dir_1, result_dir_2, output_prefix)
+        create_comparison_table(comparison_table_name, connector, drop_columns, result_dir_1, result_dir_2, table_name)
     generate_plots(
         benchmarking_results,
         benchmark_generator,

From 23a184c8e26375f2f00f9342ea3248155c4435e0 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 14:50:22 +0100
Subject: [PATCH 22/81] add method to drop table

---
 src/pheval/analyse/get_connection.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index 34915e269..445b73a87 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -19,7 +19,7 @@ def get_connection() -> DuckDBPyConnection:
         conn = duckdb.connect("analysis.db")
         return conn
 
-    def add_column(self, table_name: str, column: str, default: int=0) -> None:
+    def add_column_integer_default(self, table_name: str, column: str, default: int=0) -> None:
         try:
             self.conn.execute(f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}')
             self.conn.execute(f'UPDATE {table_name} SET "{column}" = {default}')
@@ -27,5 +27,8 @@ def add_column(self, table_name: str, column: str, default: int=0) -> None:
         except  duckdb.CatalogException:
             pass
 
+    def drop_table(self, table_name: str) -> None:
+        self.conn.execute(f"""DROP TABLE IF EXISTS '{table_name}';""")
+
     def close(self):
         self.conn.close()

From 462f60c04c92eea10828bbd498ead30010922ecf Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 14:51:10 +0100
Subject: [PATCH 23/81] add rank comparison suffix for table naming

---
 src/pheval/constants.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pheval/constants.py b/src/pheval/constants.py
index 0c054462b..7435dd119 100644
--- a/src/pheval/constants.py
+++ b/src/pheval/constants.py
@@ -5,4 +5,4 @@
 VARIANT_PLOT_Y_LABEL = "Disease-causing variants (%)"
 DISEASE_PRIORITISATION_TYPE_STR = "disease"
 DISEASE_PLOT_Y_LABEL = "Known diseases (%)"
-RANK_COMPARISON_FILE_SUFFIX = "_rank_comparison.tsv"
+RANK_COMPARISON_SUFFIX = "_rank_comparison"

From 9b9a3ec85409bba0ef7581825591244b58d779d1 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 15:03:07 +0100
Subject: [PATCH 24/81] remove parameter for rank comparison dictionary

---
 src/pheval/analyse/analysis.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index 263a7b843..002f22c37 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -1,5 +1,3 @@
-from collections import defaultdict
-from pathlib import Path
 from typing import List
 
 from pheval.analyse.benchmark_generator import (
@@ -39,9 +37,8 @@ def _run_benchmark(
     stats_writer = RankStatsWriter(
         str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
-    rank_comparison = defaultdict(dict)
     benchmark_result = benchmark_generator.generate_benchmark_run_results(
-        results_dir_and_input, score_order, threshold, rank_comparison
+        results_dir_and_input, score_order, threshold
     )
     stats_writer.add_statistics_entry(
         results_dir_and_input.results_dir,
@@ -140,7 +137,12 @@ def _run_benchmark_comparison(
             benchmark_result.binary_classification_stats,
         )
         benchmarking_results.append(benchmark_result)
-    generate_benchmark_comparison_output(benchmarking_results, plot_type, benchmark_generator)
+    [generate_benchmark_comparison_output(benchmarking_results, plot_type, benchmark_generator,
+                                          f"{unique_test_corpora_directory.parents[0].name}_"
+                                          f"{benchmark_generator.prioritisation_type_string}")
+     for unique_test_corpora_directory in
+     unique_test_corpora_directories]
+
 
 
 def benchmark_run_comparisons(

From 154f5fb9b60a1c17997c73afa103f2cafa22a289 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 15:03:17 +0100
Subject: [PATCH 25/81] remove parameter for rank comparison dictionary

---
 src/pheval/analyse/benchmark_generator.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index 8dcc84f3a..ee360b604 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -1,4 +1,3 @@
-from collections import defaultdict
 from dataclasses import dataclass
 from typing import Callable
 
@@ -33,7 +32,7 @@ class BenchmarkRunOutputGenerator:
     prioritisation_type_string: str
     y_label: str
     generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
+        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
     ]
     stats_comparison_file_suffix: str
 
@@ -63,7 +62,7 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     prioritisation_type_string: str = GENE_PRIORITISATION_TYPE_STR
     y_label: str = GENE_PLOT_Y_LABEL
     generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
+        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
     ] = benchmark_gene_prioritisation
     stats_comparison_file_suffix: str = "-gene_summary.tsv"
 
@@ -94,7 +93,7 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     prioritisation_type_string: str = VARIANT_PRIORITISATION_TYPE_STR
     y_label: str = VARIANT_PLOT_Y_LABEL
     generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
+        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
     ] = benchmark_variant_prioritisation
     stats_comparison_file_suffix: str = "-variant_summary.tsv"
 
@@ -124,6 +123,6 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     prioritisation_type_string: str = DISEASE_PRIORITISATION_TYPE_STR
     y_label: str = DISEASE_PLOT_Y_LABEL
     generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float, defaultdict], BenchmarkRunResults
+        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
     ] = benchmark_disease_prioritisation
     stats_comparison_file_suffix: str = "-disease_summary.tsv"

From 97d478ae49fb4e3df5b8f6f535432e6c34513ef4 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 15:05:58 +0100
Subject: [PATCH 26/81] fix table name

---
 src/pheval/analyse/get_connection.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index 445b73a87..5d65fbe33 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -28,7 +28,7 @@ def add_column_integer_default(self, table_name: str, column: str, default: int=
             pass
 
     def drop_table(self, table_name: str) -> None:
-        self.conn.execute(f"""DROP TABLE IF EXISTS '{table_name}';""")
+        self.conn.execute(f"""DROP TABLE IF EXISTS "{table_name}";""")
 
     def close(self):
         self.conn.close()

From 720da4e46c6a2d54dc51ae1bb69b5b90559fba1c Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 15:06:25 +0100
Subject: [PATCH 27/81] remove ranks parameter

---
 src/pheval/analyse/parse_benchmark_summary.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/pheval/analyse/parse_benchmark_summary.py b/src/pheval/analyse/parse_benchmark_summary.py
index 386f3a8b6..e628292fa 100644
--- a/src/pheval/analyse/parse_benchmark_summary.py
+++ b/src/pheval/analyse/parse_benchmark_summary.py
@@ -60,7 +60,6 @@ def parse_benchmark_result_summary(benchmarking_df: pd.DataFrame) -> List[Benchm
                 total=row["total"],
                 mrr=row["mean_reciprocal_rank"],
             ),
-            ranks={},
             benchmark_name=row["results_directory_path"],
             binary_classification_stats=BinaryClassificationStats(),
         )

From fe4fc873e40a8f609a16d9a59601d13abdfd1ac2 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 16:29:46 +0100
Subject: [PATCH 28/81] refactor to use duckdb for benchmarking

---
 .../disease_prioritisation_analysis.py        | 219 +++++++-----------
 .../variant_prioritisation_analysis.py        | 211 +++++++----------
 2 files changed, 164 insertions(+), 266 deletions(-)

diff --git a/src/pheval/analyse/disease_prioritisation_analysis.py b/src/pheval/analyse/disease_prioritisation_analysis.py
index 8041c4063..7bc4f302f 100644
--- a/src/pheval/analyse/disease_prioritisation_analysis.py
+++ b/src/pheval/analyse/disease_prioritisation_analysis.py
@@ -1,76 +1,50 @@
-from collections import defaultdict
 from pathlib import Path
 from typing import List
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.parse_corpus import _obtain_causative_diseases
+from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
-from pheval.analyse.prioritisation_rank_recorder import PrioritisationRankRecorder
-from pheval.analyse.prioritisation_result_types import DiseasePrioritisationResult
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
 from pheval.utils.file_utils import all_files
-from pheval.utils.phenopacket_utils import ProbandDisease
 
 
 class AssessDiseasePrioritisation:
     """Class for assessing disease prioritisation based on thresholds and scoring orders."""
 
     def __init__(
-        self,
-        phenopacket_path: Path,
-        results_dir: Path,
-        standardised_disease_results: List[RankedPhEvalDiseaseResult],
-        threshold: float,
-        score_order: str,
-        proband_diseases: List[ProbandDisease],
+            self,
+            db_connection: DBConnector,
+            table_name: str,
+            results_dir: Path,
+            threshold: float,
+            score_order: str,
     ):
         """
         Initialise AssessDiseasePrioritisation class
 
         Args:
-            phenopacket_path (Path): Path to the phenopacket file
+            db_connection (DBConnector): Database connection
+            table_name (str): Table name
             results_dir (Path): Path to the results directory
-            standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of ranked PhEval disease results
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending
-            proband_diseases (List[ProbandDisease]): List of proband diseases
 
         """
-        self.phenopacket_path = phenopacket_path
         self.results_dir = results_dir
-        self.standardised_disease_results = standardised_disease_results
         self.threshold = threshold
         self.score_order = score_order
-        self.proband_diseases = proband_diseases
-
-    def _record_disease_prioritisation_match(
-        self,
-        disease: ProbandDisease,
-        result_entry: RankedPhEvalDiseaseResult,
-        rank_stats: RankStats,
-    ) -> DiseasePrioritisationResult:
-        """
-        Record the disease prioritisation rank if found within the results
-        Args:
-            disease (ProbandDisease): Diagnosed proband disease
-            result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
-            rank_stats (RankStats): RankStats class instance
-        Returns:
-            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
-        """
-        rank = result_entry.rank
-        rank_stats.add_rank(rank)
-        return DiseasePrioritisationResult(self.phenopacket_path, disease, rank)
+        self.conn = db_connection.conn
+        self.column = str(self.results_dir.parents[0])
+        self.table_name = table_name
+        db_connection.add_column_integer_default(table_name=table_name, column=self.column, default=0)
 
     def _assess_disease_with_threshold_ascending_order(
-        self,
-        result_entry: RankedPhEvalDiseaseResult,
-        disease: ProbandDisease,
-        rank_stats: RankStats,
-    ) -> DiseasePrioritisationResult:
+            self,
+            result_entry: RankedPhEvalDiseaseResult,
+    ) -> int:
         """
         Record the disease prioritisation rank if it meets the ascending order threshold.
 
@@ -79,21 +53,19 @@ def _assess_disease_with_threshold_ascending_order(
 
         Args:
             result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
-            disease (ProbandDisease): Diagnosed proband disease
-            rank_stats (RankStats): RankStats class instance
 
         Returns:
-            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
+            int: Recorded disease prioritisation rank
         """
         if float(self.threshold) > float(result_entry.score):
-            return self._record_disease_prioritisation_match(disease, result_entry, rank_stats)
+            return result_entry.rank
+        else:
+            return 0
 
     def _assess_disease_with_threshold(
-        self,
-        result_entry: RankedPhEvalDiseaseResult,
-        disease: ProbandDisease,
-        rank_stats: RankStats,
-    ) -> DiseasePrioritisationResult:
+            self,
+            result_entry: RankedPhEvalDiseaseResult,
+    ) -> int:
         """
         Record the disease prioritisation rank if it meets the score threshold.
 
@@ -102,21 +74,19 @@ def _assess_disease_with_threshold(
 
         Args:
             result_entry (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
-            disease (ProbandDisease): Diagnosed proband disease
-            rank_stats (RankStats): RankStats class instance
 
         Returns:
-            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
+            int: Recorded disease prioritisation rank
         """
         if float(self.threshold) < float(result_entry.score):
-            return self._record_disease_prioritisation_match(disease, result_entry, rank_stats)
+            return result_entry.rank
+        else:
+            return 0
 
     def _record_matched_disease(
-        self,
-        disease: ProbandDisease,
-        rank_stats: RankStats,
-        standardised_disease_result: RankedPhEvalDiseaseResult,
-    ) -> DiseasePrioritisationResult:
+            self,
+            standardised_disease_result: RankedPhEvalDiseaseResult,
+    ) -> int:
         """
         Return the disease rank result - handling the specification of a threshold.
 
@@ -125,33 +95,27 @@ def _record_matched_disease(
         Otherwise, it assesses the disease with the threshold based on the score order.
 
         Args:
-            disease (ProbandDisease): Diagnosed proband disease
-            rank_stats (RankStats): RankStats class instance
             standardised_disease_result (RankedPhEvalDiseaseResult): Ranked PhEval disease result entry
 
         Returns:
-            DiseasePrioritisationResult: Recorded correct disease prioritisation rank result
+            int: Recorded disease prioritisation rank
         """
         if float(self.threshold) == 0.0:
-            return self._record_disease_prioritisation_match(
-                disease, standardised_disease_result, rank_stats
-            )
+            return standardised_disease_result.rank
         else:
             return (
-                self._assess_disease_with_threshold(
-                    standardised_disease_result, disease, rank_stats
-                )
+                self._assess_disease_with_threshold(standardised_disease_result)
                 if self.score_order != "ascending"
                 else self._assess_disease_with_threshold_ascending_order(
-                    standardised_disease_result, disease, rank_stats
+                    standardised_disease_result,
                 )
             )
 
     def assess_disease_prioritisation(
-        self,
-        rank_stats: RankStats,
-        rank_records: defaultdict,
-        binary_classification_stats: BinaryClassificationStats,
+            self,
+            standardised_disease_results: List[RankedPhEvalDiseaseResult],
+            phenopacket_path: Path,
+            binary_classification_stats: BinaryClassificationStats,
     ) -> None:
         """
         Assess disease prioritisation.
@@ -160,52 +124,35 @@ def assess_disease_prioritisation(
         and records ranks using a PrioritisationRankRecorder.
 
         Args:
-            rank_stats (RankStats): RankStats class instance
-            rank_records (defaultdict): A defaultdict to store the correct ranked results.
+            standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of standardised disease results.
+            phenopacket_path (Path): Path to the phenopacket.
             binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         """
         relevant_ranks = []
-        for disease in self.proband_diseases:
-            rank_stats.total += 1
-            disease_match = DiseasePrioritisationResult(self.phenopacket_path, disease)
-            for standardised_disease_result in self.standardised_disease_results:
-                if (
-                    disease.disease_identifier == standardised_disease_result.disease_identifier
-                    or disease.disease_name == standardised_disease_result.disease_name
-                ):
-                    disease_match = self._record_matched_disease(
-                        disease, rank_stats, standardised_disease_result
-                    )
-                    (
-                        relevant_ranks.append(disease_match.rank)
-                        if disease_match
-                        else relevant_ranks.append(0)
-                    )
-                    break
-            PrioritisationRankRecorder(
-                rank_stats.total,
-                self.results_dir,
-                (
-                    DiseasePrioritisationResult(self.phenopacket_path, disease)
-                    if disease_match is None
-                    else disease_match
-                ),
-                rank_records,
-            ).record_rank()
-        rank_stats.relevant_result_ranks.append(relevant_ranks)
+        df = self.conn.execute(
+            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'""").fetchdf()
+        for i, row in df.iterrows():
+            generated_matches = list(result for result in standardised_disease_results if
+                                     row["disease_name"] == result.disease_name or row[
+                                         "disease_identifier"] == result.disease_identifier)
+            if len(generated_matches) > 0:
+                disease_match = self._record_matched_disease(generated_matches[0])
+                relevant_ranks.append(disease_match)
+                primary_key = f"{phenopacket_path.name}-{row['disease_identifier']}"
+                self.conn.execute(
+                    f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
+                    (disease_match, primary_key),
+                )
         binary_classification_stats.add_classification(
-            self.standardised_disease_results, relevant_ranks
+            standardised_disease_results, relevant_ranks
         )
 
 
 def assess_phenopacket_disease_prioritisation(
-    phenopacket_path: Path,
-    score_order: str,
-    results_dir_and_input: TrackInputOutputDirectories,
-    threshold: float,
-    disease_rank_stats: RankStats,
-    disease_rank_comparison: defaultdict,
-    disease_binary_classification_stats: BinaryClassificationStats,
+        phenopacket_path: Path,
+        results_dir_and_input: TrackInputOutputDirectories,
+        disease_binary_classification_stats: BinaryClassificationStats,
+        disease_benchmarker: AssessDiseasePrioritisation
 ) -> None:
     """
     Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results
@@ -213,35 +160,24 @@ def assess_phenopacket_disease_prioritisation(
 
     Args:
         phenopacket_path (Path): Path to the Phenopacket.
-        score_order (str): The order in which scores are arranged, either ascending or descending.
         results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
-        threshold (float): Threshold for assessment.
-        disease_rank_stats (RankStats): RankStats class instance.
-        disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
         disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
+        disease_benchmarker (AssessDiseasePrioritisation): AssessDiseasePrioritisation class instance.
     """
     standardised_disease_result = results_dir_and_input.results_dir.joinpath(
         f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
     )
     pheval_disease_result = read_standardised_result(standardised_disease_result)
-    proband_diseases = _obtain_causative_diseases(phenopacket_path)
-    AssessDiseasePrioritisation(
-        phenopacket_path,
-        results_dir_and_input.results_dir.joinpath("pheval_disease_results/"),
+    disease_benchmarker.assess_disease_prioritisation(
         parse_pheval_result(RankedPhEvalDiseaseResult, pheval_disease_result),
-        threshold,
-        score_order,
-        proband_diseases,
-    ).assess_disease_prioritisation(
-        disease_rank_stats, disease_rank_comparison, disease_binary_classification_stats
-    )
+        phenopacket_path,
+        disease_binary_classification_stats)
 
 
 def benchmark_disease_prioritisation(
-    results_directory_and_input: TrackInputOutputDirectories,
-    score_order: str,
-    threshold: float,
-    disease_rank_comparison: defaultdict,
+        results_directory_and_input: TrackInputOutputDirectories,
+        score_order: str,
+        threshold: float,
 ):
     """
     Benchmark a directory based on disease prioritisation results.
@@ -250,27 +186,34 @@ def benchmark_disease_prioritisation(
         results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
         score_order (str): The order in which scores are arranged.
         threshold (float): Threshold for assessment.
-        disease_rank_comparison (defaultdict): Default dictionary for disease rank comparisons.
 
     Returns:
         BenchmarkRunResults: An object containing benchmarking results for disease prioritisation,
         including ranks and rank statistics for the benchmarked directory.
     """
-    disease_rank_stats = RankStats()
     disease_binary_classification_stats = BinaryClassificationStats()
+    db_connection = DBConnector()
+    disease_benchmarker = AssessDiseasePrioritisation(db_connection,
+                                                      f"{results_directory_and_input.phenopacket_dir.parents[0].name}_disease",
+                                                      results_directory_and_input.results_dir.joinpath(
+                                                          "pheval_disease_results/"),
+                                                      threshold,
+                                                      score_order)
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_disease_prioritisation(
             phenopacket_path,
-            score_order,
             results_directory_and_input,
-            threshold,
-            disease_rank_stats,
-            disease_rank_comparison,
             disease_binary_classification_stats,
+            disease_benchmarker
         )
+    db_connection.close()
+    disease_rank_stats = RankStats()
+    disease_rank_stats.add_ranks(
+        table_name=f'{results_directory_and_input.phenopacket_dir.parents[0].name}_disease',
+        column_name=str(results_directory_and_input.results_dir))
     return BenchmarkRunResults(
-        results_dir=results_directory_and_input.results_dir,
-        ranks=disease_rank_comparison,
         rank_stats=disease_rank_stats,
+        results_dir=results_directory_and_input.results_dir,
         binary_classification_stats=disease_binary_classification_stats,
+        phenopacket_dir=results_directory_and_input.phenopacket_dir,
     )
diff --git a/src/pheval/analyse/variant_prioritisation_analysis.py b/src/pheval/analyse/variant_prioritisation_analysis.py
index 2aecc4a67..87108fa70 100644
--- a/src/pheval/analyse/variant_prioritisation_analysis.py
+++ b/src/pheval/analyse/variant_prioritisation_analysis.py
@@ -4,10 +4,8 @@
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.parse_corpus import _obtain_causative_variants
+from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
-from pheval.analyse.prioritisation_rank_recorder import PrioritisationRankRecorder
-from pheval.analyse.prioritisation_result_types import VariantPrioritisationResult
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalVariantResult
@@ -19,62 +17,33 @@ class AssessVariantPrioritisation:
     """Class for assessing variant prioritisation based on thresholds and scoring orders."""
 
     def __init__(
-        self,
-        phenopacket_path: Path,
-        results_dir: Path,
-        standardised_variant_results: List[RankedPhEvalVariantResult],
-        threshold: float,
-        score_order: str,
-        proband_causative_variants: List[GenomicVariant],
+            self,
+            db_connection: DBConnector,
+            table_name: str,
+            results_dir: Path,
+            threshold: float,
+            score_order: str,
     ):
         """
         Initialise AssessVariantPrioritisation class
 
         Args:
-            phenopacket_path (Path): Path to the phenopacket file
             results_dir (Path): Path to the results directory
-            standardised_variant_results (List[RankedPhEvalVariantResult]): List of ranked PhEval variant results
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending
-            proband_causative_variants (List[GenomicVariant]): List of proband variants
 
         """
-        self.phenopacket_path = phenopacket_path
         self.results_dir = results_dir
-        self.standardised_variant_results = standardised_variant_results
         self.threshold = threshold
         self.score_order = score_order
-        self.proband_causative_variants = proband_causative_variants
-
-    def _record_variant_prioritisation_match(
-        self,
-        result_entry: RankedPhEvalVariantResult,
-        rank_stats: RankStats,
-    ) -> VariantPrioritisationResult:
-        """
-        Record the variant prioritisation rank if found within the results
-        Args:
-            result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
-            rank_stats (RankStats): RankStats class instance
-        Returns:
-            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
-        """
-        rank = result_entry.rank
-        rank_stats.add_rank(rank)
-        return VariantPrioritisationResult(
-            self.phenopacket_path,
-            GenomicVariant(
-                chrom=result_entry.chromosome,
-                pos=result_entry.start,
-                ref=result_entry.ref,
-                alt=result_entry.alt,
-            ),
-            rank,
-        )
+        self.conn = db_connection.conn
+        self.column = str(self.results_dir.parents[0])
+        self.table_name = table_name
+        db_connection.add_column_integer_default(table_name=table_name, column=self.column, default=0)
 
     def _assess_variant_with_threshold_ascending_order(
-        self, result_entry: RankedPhEvalVariantResult, rank_stats: RankStats
-    ) -> VariantPrioritisationResult:
+            self, result_entry: RankedPhEvalVariantResult
+    ) -> int:
         """
         Record the variant prioritisation rank if it meets the ascending order threshold.
 
@@ -83,17 +52,18 @@ def _assess_variant_with_threshold_ascending_order(
 
         Args:
             result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
-            rank_stats (RankStats): RankStats class instance
 
         Returns:
-            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
+            int: Recorded variant prioritisation rank
         """
         if float(self.threshold) > float(result_entry.score):
-            return self._record_variant_prioritisation_match(result_entry, rank_stats)
+            return result_entry.rank
+        else:
+            return 0
 
     def _assess_variant_with_threshold(
-        self, result_entry: RankedPhEvalVariantResult, rank_stats: RankStats
-    ) -> VariantPrioritisationResult:
+            self, result_entry: RankedPhEvalVariantResult
+    ) -> int:
         """
         Record the variant prioritisation rank if it meets the score threshold.
 
@@ -102,17 +72,18 @@ def _assess_variant_with_threshold(
 
         Args:
             result_entry (RankedPhEvalVariantResult): Ranked PhEval variant result entry
-            rank_stats (RankStats): RankStats class instance
 
         Returns:
-            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
+            int: Recorded variant prioritisation rank
         """
         if float(self.threshold) < float(result_entry.score):
-            return self._record_variant_prioritisation_match(result_entry, rank_stats)
+            return result_entry.rank
+        else:
+            return 0
 
     def _record_matched_variant(
-        self, rank_stats: RankStats, standardised_variant_result: RankedPhEvalVariantResult
-    ) -> VariantPrioritisationResult:
+            self, standardised_variant_result: RankedPhEvalVariantResult
+    ) -> int:
         """
         Return the variant rank result - handling the specification of a threshold.
 
@@ -121,30 +92,27 @@ def _record_matched_variant(
         Otherwise, it assesses the variant with the threshold based on the score order.
 
         Args:
-            rank_stats (RankStats): RankStats class instance
             standardised_variant_result (RankedPhEvalVariantResult): Ranked PhEval variant result entry
 
         Returns:
-            VariantPrioritisationResult: Recorded correct variant prioritisation rank result
+            int: Recorded variant prioritisation rank
         """
         if float(self.threshold) == 0.0:
-            return self._record_variant_prioritisation_match(
-                standardised_variant_result, rank_stats
-            )
+            return standardised_variant_result.rank
         else:
             return (
-                self._assess_variant_with_threshold(standardised_variant_result, rank_stats)
+                self._assess_variant_with_threshold(standardised_variant_result)
                 if self.score_order != "ascending"
                 else self._assess_variant_with_threshold_ascending_order(
-                    standardised_variant_result, rank_stats
+                    standardised_variant_result,
                 )
             )
 
     def assess_variant_prioritisation(
-        self,
-        rank_stats: RankStats,
-        rank_records: defaultdict,
-        binary_classification_stats: BinaryClassificationStats,
+            self,
+            standardised_variant_results: List[RankedPhEvalVariantResult],
+            phenopacket_path: Path,
+            binary_classification_stats: BinaryClassificationStats,
     ) -> None:
         """
         Assess variant prioritisation.
@@ -153,53 +121,43 @@ def assess_variant_prioritisation(
         and records ranks using a PrioritisationRankRecorder.
 
         Args:
-            rank_stats (RankStats): RankStats class instance
-            rank_records (defaultdict): A defaultdict to store the correct ranked results.
+            standardised_variant_results (List[RankedPhEvalVariantResult]): List of standardised variant results.
+            phenopacket_path (Path): Path to the phenopacket.
             binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         """
         relevant_ranks = []
-        for variant in self.proband_causative_variants:
-            rank_stats.total += 1
-            variant_match = VariantPrioritisationResult(self.phenopacket_path, variant)
-            for result in self.standardised_variant_results:
-                result_variant = GenomicVariant(
-                    chrom=str(result.chromosome),
-                    pos=int(result.start),
-                    ref=result.ref,
-                    alt=result.alt,
+        df = self.conn.execute(
+            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'""").fetchdf()
+        for i, row in df.iterrows():
+            causative_variant = GenomicVariant(chrom=row["chrom"],
+                                               pos=int(row["pos"]),
+                                               ref=row["ref"],
+                                               alt=row["alt"], )
+            generated_matches = list(result for result in standardised_variant_results if
+                                     causative_variant == GenomicVariant(chrom=result.chromosome,
+                                                                         pos=result.start,
+                                                                         alt=result.alt,
+                                                                         ref=result.ref, ))
+            if len(generated_matches) > 0:
+                variant_match = self._record_matched_variant(generated_matches[0])
+                relevant_ranks.append(variant_match)
+                primary_key = (f"{phenopacket_path.name}-{causative_variant.chrom}-{causative_variant.pos}-"
+                               f"{causative_variant.ref}-{causative_variant.alt}")
+                self.conn.execute(
+                    f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
+                    (variant_match, primary_key),
                 )
-                if variant == result_variant:
-                    variant_match = self._record_matched_variant(rank_stats, result)
-                    (
-                        relevant_ranks.append(variant_match.rank)
-                        if variant_match
-                        else relevant_ranks.append(0)
-                    )
-                    break
-            PrioritisationRankRecorder(
-                rank_stats.total,
-                self.results_dir,
-                (
-                    VariantPrioritisationResult(self.phenopacket_path, variant)
-                    if variant_match is None
-                    else variant_match
-                ),
-                rank_records,
-            ).record_rank()
-        rank_stats.relevant_result_ranks.append(relevant_ranks)
+
         binary_classification_stats.add_classification(
-            self.standardised_variant_results, relevant_ranks
+            standardised_variant_results, relevant_ranks
         )
 
 
 def assess_phenopacket_variant_prioritisation(
-    phenopacket_path: Path,
-    score_order: str,
-    results_dir_and_input: TrackInputOutputDirectories,
-    threshold: float,
-    variant_rank_stats: RankStats,
-    variant_rank_comparison: defaultdict,
-    variant_binary_classification_stats: BinaryClassificationStats,
+        phenopacket_path: Path,
+        results_dir_and_input: TrackInputOutputDirectories,
+        variant_binary_classification_stats: BinaryClassificationStats,
+        variant_benchmarker: AssessVariantPrioritisation
 ) -> None:
     """
     Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results
@@ -207,35 +165,24 @@ def assess_phenopacket_variant_prioritisation(
 
     Args:
         phenopacket_path (Path): Path to the Phenopacket.
-        score_order (str): The order in which scores are arranged, either ascending or descending.
         results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
-        threshold (float): Threshold for assessment.
-        variant_rank_stats (RankStats): RankStats class instance.
-        variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
         variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
+        variant_benchmarker (AssessVariantPrioritisation): AssessVariantPrioritisation class instance.
     """
-    proband_causative_variants = _obtain_causative_variants(phenopacket_path)
     standardised_variant_result = results_dir_and_input.results_dir.joinpath(
         f"pheval_variant_results/{phenopacket_path.stem}-pheval_variant_result.tsv"
     )
     pheval_variant_result = read_standardised_result(standardised_variant_result)
-    AssessVariantPrioritisation(
-        phenopacket_path,
-        results_dir_and_input.results_dir.joinpath("pheval_variant_results/"),
+    variant_benchmarker.assess_variant_prioritisation(
         parse_pheval_result(RankedPhEvalVariantResult, pheval_variant_result),
-        threshold,
-        score_order,
-        proband_causative_variants,
-    ).assess_variant_prioritisation(
-        variant_rank_stats, variant_rank_comparison, variant_binary_classification_stats
-    )
+        phenopacket_path,
+        variant_binary_classification_stats)
 
 
 def benchmark_variant_prioritisation(
-    results_directory_and_input: TrackInputOutputDirectories,
-    score_order: str,
-    threshold: float,
-    variant_rank_comparison: defaultdict,
+        results_directory_and_input: TrackInputOutputDirectories,
+        score_order: str,
+        threshold: float,
 ):
     """
     Benchmark a directory based on variant prioritisation results.
@@ -244,27 +191,35 @@ def benchmark_variant_prioritisation(
         results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
         score_order (str): The order in which scores are arranged.
         threshold (float): Threshold for assessment.
-        variant_rank_comparison (defaultdict): Default dictionary for variant rank comparisons.
 
     Returns:
         BenchmarkRunResults: An object containing benchmarking results for variant prioritisation,
         including ranks and rank statistics for the benchmarked directory.
     """
-    variant_rank_stats = RankStats()
     variant_binary_classification_stats = BinaryClassificationStats()
+    db_connection = DBConnector()
+    variant_benchmarker = AssessVariantPrioritisation(db_connection,
+                                                      f"{results_directory_and_input.phenopacket_dir.parents[0].name}"
+                                                      f"_variant",
+                                                      results_directory_and_input.results_dir.joinpath(
+                                                          "pheval_variant_results/"),
+                                                      threshold,
+                                                      score_order,
+                                                      )
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_variant_prioritisation(
             phenopacket_path,
-            score_order,
             results_directory_and_input,
-            threshold,
-            variant_rank_stats,
-            variant_rank_comparison,
             variant_binary_classification_stats,
+            variant_benchmarker
         )
+    variant_rank_stats = RankStats()
+    variant_rank_stats.add_ranks(
+        table_name=f'{results_directory_and_input.phenopacket_dir.parents[0].name}_variant',
+        column_name=str(results_directory_and_input.results_dir))
     return BenchmarkRunResults(
         results_dir=results_directory_and_input.results_dir,
-        ranks=variant_rank_comparison,
         rank_stats=variant_rank_stats,
         binary_classification_stats=variant_binary_classification_stats,
+        phenopacket_dir=results_directory_and_input.phenopacket_dir,
     )

From 92a838566f27b651ecdfe3b92525d189124f5ce3 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 16:29:59 +0100
Subject: [PATCH 29/81] format docstrings

---
 src/pheval/analyse/gene_prioritisation_analysis.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index ba4bba5f3..a9fc18344 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -30,6 +30,8 @@ def __init__(
         Initialise AssessGenePrioritisation class.
 
         Args:
+            db_connection (DBConnector): Database connection
+            table_name (str): Table name
             results_dir (Path): Path to the results directory
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending

From 6b3a132754225604396daebd68e63d6e68e2b3f7 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 29 Jul 2024 16:30:06 +0100
Subject: [PATCH 30/81] refactor column names

---
 src/pheval/analyse/parse_corpus.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
index f63766434..c56661c2e 100644
--- a/src/pheval/analyse/parse_corpus.py
+++ b/src/pheval/analyse/parse_corpus.py
@@ -92,7 +92,7 @@ def _create_variant_table(self) -> None:
                         phenopacket VARCHAR,
                         chrom VARCHAR,
                         pos INTEGER,
-                        reference VARCHAR,
+                        "ref" VARCHAR,
                         alt VARCHAR
                     )
                     """
@@ -157,7 +157,7 @@ def _insert_variants(self, phenopacket_path: Path, variants: List[GenomicVariant
             )
             self.conn.execute(
                 f"""
-                INSERT OR IGNORE INTO {self.table_name}_variant (identifier, phenopacket, chrom, pos, reference, alt)
+                INSERT OR IGNORE INTO {self.table_name}_variant (identifier, phenopacket, chrom, pos, "ref", alt)
                 VALUES (?, ?, ?, ?, ?, ?)
                 """,
                 (

From 0211fc80bd1bc0e02ede8580bf9eed88f18ba818 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 30 Jul 2024 14:40:28 +0100
Subject: [PATCH 31/81] tox lint

---
 src/pheval/analyse/analysis.py                |  82 +++----
 src/pheval/analyse/benchmark_generator.py     |   6 +-
 .../disease_prioritisation_analysis.py        |  92 ++++----
 .../analyse/gene_prioritisation_analysis.py   | 109 ++++-----
 src/pheval/analyse/generate_plots.py          |   4 +-
 .../analyse/generate_summary_outputs.py       |  77 ++++---
 src/pheval/analyse/get_connection.py          |  10 +-
 src/pheval/analyse/parse_corpus.py            |  25 ++-
 src/pheval/analyse/rank_stats.py              | 209 +++++++++---------
 .../variant_prioritisation_analysis.py        | 113 +++++-----
 10 files changed, 398 insertions(+), 329 deletions(-)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index 002f22c37..6ef05d27a 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -16,12 +16,12 @@
 
 
 def _run_benchmark(
-        results_dir_and_input: TrackInputOutputDirectories,
-        score_order: str,
-        output_prefix: str,
-        threshold: float,
-        plot_type: str,
-        benchmark_generator: BenchmarkRunOutputGenerator,
+    results_dir_and_input: TrackInputOutputDirectories,
+    score_order: str,
+    output_prefix: str,
+    threshold: float,
+    plot_type: str,
+    benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """Run a benchmark on a result directory.
 
@@ -49,14 +49,14 @@ def _run_benchmark(
 
 
 def benchmark_directory(
-        results_dir_and_input: TrackInputOutputDirectories,
-        score_order: str,
-        output_prefix: str,
-        threshold: float,
-        gene_analysis: bool,
-        variant_analysis: bool,
-        disease_analysis: bool,
-        plot_type: str,
+    results_dir_and_input: TrackInputOutputDirectories,
+    score_order: str,
+    output_prefix: str,
+    threshold: float,
+    gene_analysis: bool,
+    variant_analysis: bool,
+    disease_analysis: bool,
+    plot_type: str,
 ) -> None:
     """
     Benchmark prioritisation performance for a single run.
@@ -101,12 +101,12 @@ def benchmark_directory(
 
 
 def _run_benchmark_comparison(
-        results_directories: List[TrackInputOutputDirectories],
-        score_order: str,
-        output_prefix: str,
-        threshold: float,
-        plot_type: str,
-        benchmark_generator: BenchmarkRunOutputGenerator,
+    results_directories: List[TrackInputOutputDirectories],
+    score_order: str,
+    output_prefix: str,
+    threshold: float,
+    plot_type: str,
+    benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
     Run a benchmark on several result directories.
@@ -123,9 +123,13 @@ def _run_benchmark_comparison(
     stats_writer = RankStatsWriter(
         str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
-    unique_test_corpora_directories = set([result.phenopacket_dir for result in results_directories])
-    [CorpusParser(test_corpora_directory).parse_corpus(benchmark_generator) for test_corpora_directory in
-     unique_test_corpora_directories]
+    unique_test_corpora_directories = set(
+        [result.phenopacket_dir for result in results_directories]
+    )
+    [
+        CorpusParser(test_corpora_directory).parse_corpus(benchmark_generator)
+        for test_corpora_directory in unique_test_corpora_directories
+    ]
     benchmarking_results = []
     for results_dir_and_input in results_directories:
         benchmark_result = benchmark_generator.generate_benchmark_run_results(
@@ -137,23 +141,27 @@ def _run_benchmark_comparison(
             benchmark_result.binary_classification_stats,
         )
         benchmarking_results.append(benchmark_result)
-    [generate_benchmark_comparison_output(benchmarking_results, plot_type, benchmark_generator,
-                                          f"{unique_test_corpora_directory.parents[0].name}_"
-                                          f"{benchmark_generator.prioritisation_type_string}")
-     for unique_test_corpora_directory in
-     unique_test_corpora_directories]
-
+    [
+        generate_benchmark_comparison_output(
+            benchmarking_results,
+            plot_type,
+            benchmark_generator,
+            f"{unique_test_corpora_directory.parents[0].name}_"
+            f"{benchmark_generator.prioritisation_type_string}",
+        )
+        for unique_test_corpora_directory in unique_test_corpora_directories
+    ]
 
 
 def benchmark_run_comparisons(
-        results_directories: List[TrackInputOutputDirectories],
-        score_order: str,
-        output_prefix: str,
-        threshold: float,
-        gene_analysis: bool,
-        variant_analysis: bool,
-        disease_analysis: bool,
-        plot_type: str,
+    results_directories: List[TrackInputOutputDirectories],
+    score_order: str,
+    output_prefix: str,
+    threshold: float,
+    gene_analysis: bool,
+    variant_analysis: bool,
+    disease_analysis: bool,
+    plot_type: str,
 ) -> None:
     """
     Benchmark prioritisation performance for several runs.
diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index ee360b604..6996427ab 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -7,12 +7,12 @@
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
 from pheval.constants import (
-    DISEASE_PRIORITISATION_TYPE_STR,
     DISEASE_PLOT_Y_LABEL,
-    GENE_PRIORITISATION_TYPE_STR,
+    DISEASE_PRIORITISATION_TYPE_STR,
     GENE_PLOT_Y_LABEL,
-    VARIANT_PRIORITISATION_TYPE_STR,
+    GENE_PRIORITISATION_TYPE_STR,
     VARIANT_PLOT_Y_LABEL,
+    VARIANT_PRIORITISATION_TYPE_STR,
 )
 
 
diff --git a/src/pheval/analyse/disease_prioritisation_analysis.py b/src/pheval/analyse/disease_prioritisation_analysis.py
index 7bc4f302f..a2098a9c9 100644
--- a/src/pheval/analyse/disease_prioritisation_analysis.py
+++ b/src/pheval/analyse/disease_prioritisation_analysis.py
@@ -15,12 +15,12 @@ class AssessDiseasePrioritisation:
     """Class for assessing disease prioritisation based on thresholds and scoring orders."""
 
     def __init__(
-            self,
-            db_connection: DBConnector,
-            table_name: str,
-            results_dir: Path,
-            threshold: float,
-            score_order: str,
+        self,
+        db_connection: DBConnector,
+        table_name: str,
+        results_dir: Path,
+        threshold: float,
+        score_order: str,
     ):
         """
         Initialise AssessDiseasePrioritisation class
@@ -39,11 +39,13 @@ def __init__(
         self.conn = db_connection.conn
         self.column = str(self.results_dir.parents[0])
         self.table_name = table_name
-        db_connection.add_column_integer_default(table_name=table_name, column=self.column, default=0)
+        db_connection.add_column_integer_default(
+            table_name=table_name, column=self.column, default=0
+        )
 
     def _assess_disease_with_threshold_ascending_order(
-            self,
-            result_entry: RankedPhEvalDiseaseResult,
+        self,
+        result_entry: RankedPhEvalDiseaseResult,
     ) -> int:
         """
         Record the disease prioritisation rank if it meets the ascending order threshold.
@@ -63,8 +65,8 @@ def _assess_disease_with_threshold_ascending_order(
             return 0
 
     def _assess_disease_with_threshold(
-            self,
-            result_entry: RankedPhEvalDiseaseResult,
+        self,
+        result_entry: RankedPhEvalDiseaseResult,
     ) -> int:
         """
         Record the disease prioritisation rank if it meets the score threshold.
@@ -84,8 +86,8 @@ def _assess_disease_with_threshold(
             return 0
 
     def _record_matched_disease(
-            self,
-            standardised_disease_result: RankedPhEvalDiseaseResult,
+        self,
+        standardised_disease_result: RankedPhEvalDiseaseResult,
     ) -> int:
         """
         Return the disease rank result - handling the specification of a threshold.
@@ -112,10 +114,10 @@ def _record_matched_disease(
             )
 
     def assess_disease_prioritisation(
-            self,
-            standardised_disease_results: List[RankedPhEvalDiseaseResult],
-            phenopacket_path: Path,
-            binary_classification_stats: BinaryClassificationStats,
+        self,
+        standardised_disease_results: List[RankedPhEvalDiseaseResult],
+        phenopacket_path: Path,
+        binary_classification_stats: BinaryClassificationStats,
     ) -> None:
         """
         Assess disease prioritisation.
@@ -130,11 +132,16 @@ def assess_disease_prioritisation(
         """
         relevant_ranks = []
         df = self.conn.execute(
-            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'""").fetchdf()
-        for i, row in df.iterrows():
-            generated_matches = list(result for result in standardised_disease_results if
-                                     row["disease_name"] == result.disease_name or row[
-                                         "disease_identifier"] == result.disease_identifier)
+            f"SELECT * FROM {self.table_name} WHERE phenopacket = ? ",
+            (phenopacket_path.name,),
+        ).fetchdf()
+        for _i, row in df.iterrows():
+            generated_matches = list(
+                result
+                for result in standardised_disease_results
+                if row["disease_name"] == result.disease_name
+                or row["disease_identifier"] == result.disease_identifier
+            )
             if len(generated_matches) > 0:
                 disease_match = self._record_matched_disease(generated_matches[0])
                 relevant_ranks.append(disease_match)
@@ -143,16 +150,14 @@ def assess_disease_prioritisation(
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (disease_match, primary_key),
                 )
-        binary_classification_stats.add_classification(
-            standardised_disease_results, relevant_ranks
-        )
+        binary_classification_stats.add_classification(standardised_disease_results, relevant_ranks)
 
 
 def assess_phenopacket_disease_prioritisation(
-        phenopacket_path: Path,
-        results_dir_and_input: TrackInputOutputDirectories,
-        disease_binary_classification_stats: BinaryClassificationStats,
-        disease_benchmarker: AssessDiseasePrioritisation
+    phenopacket_path: Path,
+    results_dir_and_input: TrackInputOutputDirectories,
+    disease_binary_classification_stats: BinaryClassificationStats,
+    disease_benchmarker: AssessDiseasePrioritisation,
 ) -> None:
     """
     Assess disease prioritisation for a Phenopacket by comparing PhEval standardised disease results
@@ -171,13 +176,14 @@ def assess_phenopacket_disease_prioritisation(
     disease_benchmarker.assess_disease_prioritisation(
         parse_pheval_result(RankedPhEvalDiseaseResult, pheval_disease_result),
         phenopacket_path,
-        disease_binary_classification_stats)
+        disease_binary_classification_stats,
+    )
 
 
 def benchmark_disease_prioritisation(
-        results_directory_and_input: TrackInputOutputDirectories,
-        score_order: str,
-        threshold: float,
+    results_directory_and_input: TrackInputOutputDirectories,
+    score_order: str,
+    threshold: float,
 ):
     """
     Benchmark a directory based on disease prioritisation results.
@@ -193,24 +199,26 @@ def benchmark_disease_prioritisation(
     """
     disease_binary_classification_stats = BinaryClassificationStats()
     db_connection = DBConnector()
-    disease_benchmarker = AssessDiseasePrioritisation(db_connection,
-                                                      f"{results_directory_and_input.phenopacket_dir.parents[0].name}_disease",
-                                                      results_directory_and_input.results_dir.joinpath(
-                                                          "pheval_disease_results/"),
-                                                      threshold,
-                                                      score_order)
+    disease_benchmarker = AssessDiseasePrioritisation(
+        db_connection,
+        f"{results_directory_and_input.phenopacket_dir.parents[0].name}_disease",
+        results_directory_and_input.results_dir.joinpath("pheval_disease_results/"),
+        threshold,
+        score_order,
+    )
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_disease_prioritisation(
             phenopacket_path,
             results_directory_and_input,
             disease_binary_classification_stats,
-            disease_benchmarker
+            disease_benchmarker,
         )
     db_connection.close()
     disease_rank_stats = RankStats()
     disease_rank_stats.add_ranks(
-        table_name=f'{results_directory_and_input.phenopacket_dir.parents[0].name}_disease',
-        column_name=str(results_directory_and_input.results_dir))
+        table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_disease",
+        column_name=str(results_directory_and_input.results_dir),
+    )
     return BenchmarkRunResults(
         rank_stats=disease_rank_stats,
         results_dir=results_directory_and_input.results_dir,
diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index a9fc18344..2a00b47c7 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -3,14 +3,12 @@
 from pathlib import Path
 from typing import List, Union
 
-
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
+from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
-from pheval.analyse.prioritisation_result_types import GenePrioritisationResult
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import TrackInputOutputDirectories
-from pheval.analyse.get_connection import DBConnector
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
 from pheval.utils.file_utils import all_files
 
@@ -19,12 +17,12 @@ class AssessGenePrioritisation:
     """Class for assessing gene prioritisation based on thresholds and scoring orders."""
 
     def __init__(
-            self,
-            db_connection: DBConnector,
-            table_name: str,
-            results_dir: Path,
-            threshold: float,
-            score_order: str,
+        self,
+        db_connection: DBConnector,
+        table_name: str,
+        results_dir: Path,
+        threshold: float,
+        score_order: str,
     ):
         """
         Initialise AssessGenePrioritisation class.
@@ -39,14 +37,17 @@ def __init__(
         self.results_dir = results_dir
         self.threshold = threshold
         self.score_order = score_order
+        self.db_connection = db_connection
         self.conn = db_connection.conn
         self.column = str(self.results_dir.parents[0])
         self.table_name = table_name
-        db_connection.add_column_integer_default(table_name=table_name, column=self.column, default=0)
+        db_connection.add_column_integer_default(
+            table_name=table_name, column=self.column, default=0
+        )
 
     def _assess_gene_with_threshold_ascending_order(
-            self,
-            result_entry: RankedPhEvalGeneResult,
+        self,
+        result_entry: RankedPhEvalGeneResult,
     ) -> int:
         """
         Record the gene prioritisation rank if it meets the ascending order threshold.
@@ -61,8 +62,8 @@ def _assess_gene_with_threshold_ascending_order(
             return 0
 
     def _assess_gene_with_threshold(
-            self,
-            result_entry: RankedPhEvalGeneResult,
+        self,
+        result_entry: RankedPhEvalGeneResult,
     ) -> int:
         """
         Record the gene prioritisation rank if it meets the score threshold.
@@ -78,8 +79,8 @@ def _assess_gene_with_threshold(
             return 0
 
     def _record_matched_gene(
-            self,
-            standardised_gene_result: RankedPhEvalGeneResult,
+        self,
+        standardised_gene_result: RankedPhEvalGeneResult,
     ) -> int:
         """
         Return the gene rank result - handling the specification of a threshold.
@@ -121,10 +122,10 @@ def _check_string_representation(entity: str) -> Union[List[str], str]:
             return entity
 
     def assess_gene_prioritisation(
-            self,
-            standardised_gene_results: List[RankedPhEvalGeneResult],
-            phenopacket_path: Path,
-            binary_classification_stats: BinaryClassificationStats,
+        self,
+        standardised_gene_results: List[RankedPhEvalGeneResult],
+        phenopacket_path: Path,
+        binary_classification_stats: BinaryClassificationStats,
     ) -> None:
         """
         Assess gene prioritisation.
@@ -138,27 +139,33 @@ def assess_gene_prioritisation(
         """
         relevant_ranks = []
         df = self.conn.execute(
-            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'""").fetchdf()
-        for i, row in df.iterrows():
+            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'"""
+        ).fetchdf()
+        for _i, row in df.iterrows():
             generated_matches = list(
-                result for result in standardised_gene_results
+                result
+                for result in standardised_gene_results
                 if (
-                        isinstance(self._check_string_representation(result.gene_identifier), list)
-                        and row["gene_identifier"] in self._check_string_representation(result.gene_identifier)
-                        or isinstance(self._check_string_representation(result.gene_identifier), str)
-                        and row["gene_identifier"] == self._check_string_representation(result.gene_identifier)
-                        or isinstance(self._check_string_representation(result.gene_symbol), list)
-                        and row["gene_symbol"] in self._check_string_representation(result.gene_symbol)
-                        or isinstance(self._check_string_representation(result.gene_symbol), str)
-                        and row["gene_symbol"] == self._check_string_representation(result.gene_symbol)
+                    isinstance(self._check_string_representation(result.gene_identifier), list)
+                    and row["gene_identifier"]
+                    in self._check_string_representation(result.gene_identifier)
+                    or isinstance(self._check_string_representation(result.gene_identifier), str)
+                    and row["gene_identifier"]
+                    == self._check_string_representation(result.gene_identifier)
+                    or isinstance(self._check_string_representation(result.gene_symbol), list)
+                    and row["gene_symbol"] in self._check_string_representation(result.gene_symbol)
+                    or isinstance(self._check_string_representation(result.gene_symbol), str)
+                    and row["gene_symbol"] == self._check_string_representation(result.gene_symbol)
                 )
             )
             if len(generated_matches) > 0:
                 gene_match = self._record_matched_gene(generated_matches[0])
                 relevant_ranks.append(gene_match)
                 primary_key = f"{phenopacket_path.name}-{row['gene_symbol']}"
+                safe_table_name = self.db_connection.sanitize_identifier(self.table_name)
+                safe_column = self.db_connection.sanitize_identifier(self.column)
                 self.conn.execute(
-                    f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
+                    f"UPDATE {safe_table_name} SET {safe_column} = ? WHERE identifier = ?",
                     (gene_match, primary_key),
                 )
         binary_classification_stats.add_classification(
@@ -167,10 +174,10 @@ def assess_gene_prioritisation(
 
 
 def assess_phenopacket_gene_prioritisation(
-        phenopacket_path: Path,
-        results_dir_and_input: TrackInputOutputDirectories,
-        gene_binary_classification_stats: BinaryClassificationStats,
-        gene_benchmarker: AssessGenePrioritisation,
+    phenopacket_path: Path,
+    results_dir_and_input: TrackInputOutputDirectories,
+    gene_binary_classification_stats: BinaryClassificationStats,
+    gene_benchmarker: AssessGenePrioritisation,
 ) -> None:
     """
     Assess gene prioritisation for a Phenopacket by comparing PhEval standardised gene results
@@ -189,14 +196,14 @@ def assess_phenopacket_gene_prioritisation(
     gene_benchmarker.assess_gene_prioritisation(
         parse_pheval_result(RankedPhEvalGeneResult, pheval_gene_result),
         phenopacket_path,
-        gene_binary_classification_stats
+        gene_binary_classification_stats,
     )
 
 
 def benchmark_gene_prioritisation(
-        results_directory_and_input: TrackInputOutputDirectories,
-        score_order: str,
-        threshold: float,
+    results_directory_and_input: TrackInputOutputDirectories,
+    score_order: str,
+    threshold: float,
 ) -> BenchmarkRunResults:
     """
     Benchmark a directory based on gene prioritisation results.
@@ -210,26 +217,26 @@ def benchmark_gene_prioritisation(
     """
     gene_binary_classification_stats = BinaryClassificationStats()
     db_connection = DBConnector()
-    gene_benchmarker = AssessGenePrioritisation(db_connection,
-                                                f"{results_directory_and_input.phenopacket_dir.parents[0].name}"
-                                                f"_gene",
-                                                results_directory_and_input.results_dir.joinpath(
-                                                    "pheval_gene_results/"),
-                                                threshold,
-                                                score_order
-                                                )
+    gene_benchmarker = AssessGenePrioritisation(
+        db_connection,
+        f"{results_directory_and_input.phenopacket_dir.parents[0].name}" f"_gene",
+        results_directory_and_input.results_dir.joinpath("pheval_gene_results/"),
+        threshold,
+        score_order,
+    )
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_gene_prioritisation(
             phenopacket_path,
             results_directory_and_input,
             gene_binary_classification_stats,
-            gene_benchmarker
+            gene_benchmarker,
         )
     db_connection.close()
     gene_rank_stats = RankStats()
     gene_rank_stats.add_ranks(
-        table_name=f'{results_directory_and_input.phenopacket_dir.parents[0].name}_gene',
-        column_name=str(results_directory_and_input.results_dir))
+        table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_gene",
+        column_name=str(results_directory_and_input.results_dir),
+    )
     return BenchmarkRunResults(
         rank_stats=gene_rank_stats,
         results_dir=results_directory_and_input.results_dir,
diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index 66e265181..c3e388063 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -167,9 +167,7 @@ def generate_stacked_bar_plot(
             edgecolor="white",
         ).legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
         if title is None:
-            plt.title(
-                f"{benchmark_generator.prioritisation_type_string.capitalize()} Rank Stats"
-            )
+            plt.title(f"{benchmark_generator.prioritisation_type_string.capitalize()} Rank Stats")
         else:
             plt.title(title, loc="center", fontsize=15)
         plt.ylim(0, 100)
diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index 95679b0c6..6ce27757e 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -1,17 +1,18 @@
 import itertools
 from pathlib import Path
 from typing import List
+
 from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.generate_plots import generate_plots
-from pheval.constants import RANK_COMPARISON_SUFFIX
 from pheval.analyse.get_connection import DBConnector
+from pheval.constants import RANK_COMPARISON_SUFFIX
 
 
 def generate_benchmark_output(
-        benchmarking_results: BenchmarkRunResults,
-        plot_type: str,
-        benchmark_generator: BenchmarkRunOutputGenerator,
+    benchmarking_results: BenchmarkRunResults,
+    plot_type: str,
+    benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
     Generate prioritisation outputs for a single benchmarking run.
@@ -24,9 +25,11 @@ def generate_benchmark_output(
     results_dir_name = benchmarking_results.results_dir.name
     conn = DBConnector().conn
     conn.execute(
-        f"""CREATE TABLE {results_dir_name}_{benchmark_generator.prioritisation_type_string}
-        {RANK_COMPARISON_SUFFIX} AS SELECT * EXCLUDE (identifier) FROM 
-        {benchmarking_results.phenopacket_dir.parents[0].name}_{benchmark_generator.prioritisation_type_string}""")
+        f"CREATE TABLE {results_dir_name}_{benchmark_generator.prioritisation_type_string}{RANK_COMPARISON_SUFFIX} "
+        f"AS SELECT * EXCLUDE (identifier) FROM "
+        f"{benchmarking_results.phenopacket_dir.parents[0].name}_{benchmark_generator.prioritisation_type_string}"
+    )
+
     conn.close()
     generate_plots(
         [benchmarking_results],
@@ -45,30 +48,42 @@ def get_new_table_name(result_dir_1: Path, result_dir_2: Path, output_prefix: st
     Returns:
         The new table name.
     """
-    return (f"{Path(result_dir_1).parents[0].name}_{Path(result_dir_1).name}_vs_"
-            f"{Path(result_dir_2).parents[0].name}_{Path(result_dir_2).name}_"
-            f"{output_prefix}{RANK_COMPARISON_SUFFIX}")
+    return (
+        f"{Path(result_dir_1).parents[0].name}_{Path(result_dir_1).name}_vs_"
+        f"{Path(result_dir_2).parents[0].name}_{Path(result_dir_2).name}_"
+        f"{output_prefix}{RANK_COMPARISON_SUFFIX}"
+    )
 
 
-def create_comparison_table(comparison_table_name: str, connector: DBConnector, drop_columns: List[str],
-                            result_dir_1: str, result_dir_2: str, table_name: str) -> None:
+def create_comparison_table(
+    comparison_table_name: str,
+    connector: DBConnector,
+    drop_columns: List[str],
+    result_dir_1: str,
+    result_dir_2: str,
+    table_name: str,
+) -> None:
     connector.drop_table(comparison_table_name)
     connector.conn.execute(
-        f"""CREATE TABLE "{comparison_table_name}" AS SELECT * EXCLUDE 
-        ('{", ".join(drop_columns)}', identifier) FROM {table_name}""")
-    connector.conn.execute(f"""ALTER TABLE "{comparison_table_name}" ADD COLUMN rank_change VARCHAR;""")
+        f'CREATE TABLE "{comparison_table_name}" AS SELECT * '
+        f'EXCLUDE (\'{", ".join(drop_columns)}\', identifier) FROM {table_name}'
+    )
     connector.conn.execute(
-        f"""UPDATE "{comparison_table_name}" SET rank_change = CASE WHEN "{result_dir_1}" = 0 
-        AND "{result_dir_2}" != 0 THEN 'GAINED' WHEN "{result_dir_1}" != 0 
-        AND "{result_dir_2}" = 0 THEN 'LOST' ELSE CAST ("{result_dir_1}" - "{result_dir_2}" AS VARCHAR) END;""")
+        f"""ALTER TABLE "{comparison_table_name}" ADD COLUMN rank_change VARCHAR;"""
+    )
+    connector.conn.execute(
+        f'UPDATE "{comparison_table_name}" SET rank_change = CASE WHEN "{result_dir_1}" = 0 AND "{result_dir_2}" != 0 '
+        f"THEN 'GAINED' WHEN \"{result_dir_1}\" != 0 AND \"{result_dir_2}\" = 0 THEN 'LOST' ELSE "
+        f'CAST ("{result_dir_1}" - "{result_dir_2}" AS VARCHAR) END;'
+    )
     connector.conn.commit()
 
 
 def generate_benchmark_comparison_output(
-        benchmarking_results: List[BenchmarkRunResults],
-        plot_type: str,
-        benchmark_generator: BenchmarkRunOutputGenerator,
-        table_name: str
+    benchmarking_results: List[BenchmarkRunResults],
+    plot_type: str,
+    benchmark_generator: BenchmarkRunOutputGenerator,
+    table_name: str,
 ) -> None:
     """
     Generate prioritisation outputs for benchmarking multiple runs.
@@ -86,15 +101,23 @@ def generate_benchmark_comparison_output(
     """
     output_prefix = benchmark_generator.prioritisation_type_string
     connector = DBConnector()
-    run_columns = [column for column in
-                   connector.conn.execute(f"PRAGMA table_info('{table_name}');").fetchdf()['name'].to_list()
-                   if "/" in column]
-    for pair in itertools.combinations([str(result.results_dir) for result in benchmarking_results], 2):
+    run_columns = [
+        column
+        for column in connector.conn.execute(f"PRAGMA table_info('{table_name}');")
+        .fetchdf()["name"]
+        .to_list()
+        if "/" in column
+    ]
+    for pair in itertools.combinations(
+        [str(result.results_dir) for result in benchmarking_results], 2
+    ):
         result_dir_1 = pair[0]
         result_dir_2 = pair[1]
         drop_columns = [run for run in run_columns if run not in pair]
         comparison_table_name = get_new_table_name(result_dir_1, result_dir_2, output_prefix)
-        create_comparison_table(comparison_table_name, connector, drop_columns, result_dir_1, result_dir_2, table_name)
+        create_comparison_table(
+            comparison_table_name, connector, drop_columns, result_dir_1, result_dir_2, table_name
+        )
     generate_plots(
         benchmarking_results,
         benchmark_generator,
diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index 5d65fbe33..d12004288 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -19,12 +19,14 @@ def get_connection() -> DuckDBPyConnection:
         conn = duckdb.connect("analysis.db")
         return conn
 
-    def add_column_integer_default(self, table_name: str, column: str, default: int=0) -> None:
+    def add_column_integer_default(self, table_name: str, column: str, default: int = 0) -> None:
         try:
-            self.conn.execute(f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}')
-            self.conn.execute(f'UPDATE {table_name} SET "{column}" = {default}')
+            self.conn.execute(
+                f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}'
+            )
+            self.conn.execute('UPDATE {table_name} SET "{column}" = ?', (default,))
             self.conn.commit()
-        except  duckdb.CatalogException:
+        except duckdb.CatalogException:
             pass
 
     def drop_table(self, table_name: str) -> None:
diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
index c56661c2e..e70846323 100644
--- a/src/pheval/analyse/parse_corpus.py
+++ b/src/pheval/analyse/parse_corpus.py
@@ -1,12 +1,21 @@
 from pathlib import Path
 from typing import List
 
-from pheval.analyse.benchmark_generator import GeneBenchmarkRunOutputGenerator, VariantBenchmarkRunOutputGenerator, \
-    DiseaseBenchmarkRunOutputGenerator, BenchmarkRunOutputGenerator
+from pheval.analyse.benchmark_generator import (
+    BenchmarkRunOutputGenerator,
+    DiseaseBenchmarkRunOutputGenerator,
+    GeneBenchmarkRunOutputGenerator,
+    VariantBenchmarkRunOutputGenerator,
+)
 from pheval.analyse.get_connection import DBConnector
 from pheval.utils.file_utils import all_files
-from pheval.utils.phenopacket_utils import GenomicVariant, ProbandCausativeGene, phenopacket_reader, PhenopacketUtil, \
-    ProbandDisease
+from pheval.utils.phenopacket_utils import (
+    GenomicVariant,
+    PhenopacketUtil,
+    ProbandCausativeGene,
+    ProbandDisease,
+    phenopacket_reader,
+)
 
 
 def _obtain_causative_diseases(phenopacket_path: Path) -> List[ProbandDisease]:
@@ -54,7 +63,7 @@ def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene
 
 
 class CorpusParser:
-    """ Class for parsing phenopacket corpus and retrieving known variants/genes/diseases."""
+    """Class for parsing phenopacket corpus and retrieving known variants/genes/diseases."""
 
     def __init__(self, phenopacket_dir: Path) -> None:
         """
@@ -180,10 +189,8 @@ def _insert_diseases(self, phenopacket_path: Path, diseases: List[ProbandDisease
         for disease in diseases:
             identifier = f"{phenopacket_path.name}-{disease.disease_identifier}"
             self.conn.execute(
-                f"""
-                INSERT INTO {self.table_name}_disease (identifier, phenopacket, disease_identifier, disease_name)
-                VALUES (?, ?, ?, ?)
-                """,
+                f"INSERT OR IGNORE INTO {self.table_name}_disease "
+                f"(identifier, phenopacket, disease_identifier, disease_name) VALUES (?, ?, ?, ?)",
                 (
                     identifier,
                     phenopacket_path.name,
diff --git a/src/pheval/analyse/rank_stats.py b/src/pheval/analyse/rank_stats.py
index 0ccc23506..0b4069dd1 100644
--- a/src/pheval/analyse/rank_stats.py
+++ b/src/pheval/analyse/rank_stats.py
@@ -50,18 +50,26 @@ def add_ranks(self, table_name: str, column_name: str):
         conn.close()
 
     @staticmethod
-    def _execute_count_query(conn: DuckDBPyConnection, table_name: str, column_name: str, condition: str) -> int:
+    def _execute_count_query(
+        conn: DuckDBPyConnection, table_name: str, column_name: str, condition: str
+    ) -> int:
         query = f'SELECT COUNT(*) FROM {table_name} WHERE "{column_name}" {condition}'
         return conn.execute(query).fetchone()[0]
 
     @staticmethod
-    def _fetch_reciprocal_ranks(conn: DuckDBPyConnection, table_name: str, column_name: str) -> List[float]:
+    def _fetch_reciprocal_ranks(
+        conn: DuckDBPyConnection, table_name: str, column_name: str
+    ) -> List[float]:
         query = f'SELECT "{column_name}" FROM {table_name}'
         return [1 / rank[0] if rank[0] > 0 else 0 for rank in conn.execute(query).fetchall()]
 
     @staticmethod
-    def _fetch_relevant_ranks(conn: DuckDBPyConnection, table_name: str, column_name: str) -> List[List[int]]:
-        query = f'SELECT LIST("{column_name}") as values_list FROM {table_name} GROUP BY phenopacket'
+    def _fetch_relevant_ranks(
+        conn: DuckDBPyConnection, table_name: str, column_name: str
+    ) -> List[List[int]]:
+        query = (
+            f'SELECT LIST("{column_name}") as values_list FROM {table_name} GROUP BY phenopacket'
+        )
         return [rank[0] for rank in conn.execute(query).fetchall()]
 
     def percentage_rank(self, value: int) -> float:
@@ -187,7 +195,7 @@ def precision_at_k(self, k: int) -> float:
 
     @staticmethod
     def _average_precision_at_k(
-            number_of_relevant_entities_at_k: int, precision_at_k: float
+        number_of_relevant_entities_at_k: int, precision_at_k: float
     ) -> float:
         """
         Calculate the Average Precision at k.
@@ -294,60 +302,58 @@ def __init__(self, table_name: str):
         self.table_name = table_name
         conn = DBConnector().conn
         conn.execute(
-            f"""
-                    CREATE TABLE IF NOT EXISTS "{self.table_name}" (
-                        results_directory_path VARCHAR,
-                        top INT,
-                        top3 INT,
-                        top5 INT,
-                        top10 INT,
-                        "found" INT,
-                        total INT,
-                        mean_reciprocal_rank FLOAT,
-                        percentage_top FLOAT,
-                        percentage_top3 FLOAT,
-                        percentage_top5 FLOAT,
-                        percentage_top10 FLOAT,
-                        percentage_found FLOAT,
-                        "precision@1" FLOAT,
-                        "precision@3" FLOAT,
-                        "precision@5" FLOAT,
-                        "precision@10" FLOAT,
-                        "MAP@1" FLOAT,
-                        "MAP@3" FLOAT,
-                        "MAP@5" FLOAT,
-                        "MAP@10" FLOAT,
-                        "f_beta_score@1" FLOAT,
-                        "f_beta_score@3"FLOAT,
-                        "f_beta_score@5" FLOAT,
-                        "f_beta_score@10" FLOAT,
-                        "NDCG@3" FLOAT,
-                        "NDCG@5" FLOAT,
-                        "NDCG@10" FLOAT,
-                        true_positives INT,
-                        false_positives INT,
-                        true_negatives INT,
-                        false_negatives INT,
-                        sensitivity FLOAT,
-                        specificity FLOAT,
-                        "precision" FLOAT,
-                        negative_predictive_value FLOAT,
-                        false_positive_rate FLOAT,
-                        false_discovery_rate FLOAT,
-                        false_negative_rate FLOAT,
-                        accuracy FLOAT,
-                        f1_score FLOAT,
-                        matthews_correlation_coefficient FLOAT,
-                        
-                    )
-                    """
+            f'CREATE TABLE IF NOT EXISTS "{self.table_name}" ('
+            f"results_directory_path VARCHAR,"
+            f"top INT,"
+            f"top3 INT,"
+            f"top5 INT,"
+            f"top10 INT,"
+            f'"found" INT,'
+            f"total INT,"
+            f"mean_reciprocal_rank FLOAT,"
+            f"percentage_top FLOAT,"
+            f"percentage_top3 FLOAT,"
+            f"percentage_top5 FLOAT,"
+            f"percentage_top10 FLOAT,"
+            f"percentage_found FLOAT,"
+            f'"precision@1" FLOAT,'
+            f'"precision@3" FLOAT,'
+            f'"precision@5" FLOAT,'
+            f'"precision@10" FLOAT,'
+            f'"MAP@1" FLOAT,'
+            f'"MAP@3" FLOAT,'
+            f'"MAP@5" FLOAT,'
+            f'"MAP@10" FLOAT,'
+            f'"f_beta_score@1" FLOAT,'
+            f'"f_beta_score@3"FLOAT,'
+            f'"f_beta_score@5" FLOAT,'
+            f'"f_beta_score@10" FLOAT,'
+            f'"NDCG@3" FLOAT,'
+            f'"NDCG@5" FLOAT,'
+            f'"NDCG@10" FLOAT,'
+            f"true_positives INT,"
+            f"false_positives INT,"
+            f"true_negatives INT,"
+            f"false_negatives INT,"
+            f"sensitivity FLOAT,"
+            f"specificity FLOAT,"
+            f'"precision" FLOAT,'
+            f"negative_predictive_value FLOAT,"
+            f"false_positive_rate FLOAT,"
+            f"false_discovery_rate FLOAT,"
+            f"false_negative_rate FLOAT,"
+            f"accuracy FLOAT,"
+            f"f1_score FLOAT,"
+            f"matthews_correlation_coefficient FLOAT,                        )"
         )
         conn.close()
 
-    def add_statistics_entry(self,
-                             directory_path: Path,
-                             rank_stats: RankStats,
-                             binary_classification: BinaryClassificationStats):
+    def add_statistics_entry(
+        self,
+        directory_path: Path,
+        rank_stats: RankStats,
+        binary_classification: BinaryClassificationStats,
+    ):
         """
         Add statistics row to table for a run.
         Args:
@@ -356,49 +362,50 @@ def add_statistics_entry(self,
             binary_classification (BinaryClassificationStats): BinaryClassificationStats object for the run.
         """
         conn = DBConnector().conn
-        conn.execute(f"""
-                INSERT INTO "{self.table_name}" VALUES 
-                (
-                '{directory_path}',
-                {rank_stats.top},
-                {rank_stats.top3},
-                {rank_stats.top5},
-                {rank_stats.top10},
-                {rank_stats.found},
-                {rank_stats.total},
-                {rank_stats.mean_reciprocal_rank()},
-                {rank_stats.percentage_top()},
-                {rank_stats.percentage_top3()},
-                {rank_stats.percentage_top5()},
-                {rank_stats.percentage_top10()},
-                {rank_stats.percentage_found()},
-                {rank_stats.precision_at_k(1)},
-                {rank_stats.precision_at_k(3)},
-                {rank_stats.precision_at_k(5)},
-                {rank_stats.precision_at_k(10)},
-                {rank_stats.mean_average_precision_at_k(1)},
-                {rank_stats.mean_average_precision_at_k(3)},
-                {rank_stats.mean_average_precision_at_k(5)},
-                {rank_stats.mean_average_precision_at_k(10)},
-                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 1)},
-                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 3)},
-                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 5)},
-                {rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 10)},
-                {rank_stats.mean_normalised_discounted_cumulative_gain(3)},
-                {rank_stats.mean_normalised_discounted_cumulative_gain(5)},
-                {rank_stats.mean_normalised_discounted_cumulative_gain(10)},
-                {binary_classification.true_positives},
-                {binary_classification.false_positives},
-                {binary_classification.true_negatives},
-                {binary_classification.false_negatives},
-                {binary_classification.sensitivity()},
-                {binary_classification.specificity()},
-                {binary_classification.precision()},
-                {binary_classification.negative_predictive_value()},
-                {binary_classification.false_positive_rate()},
-                {binary_classification.false_discovery_rate()},
-                {binary_classification.false_negative_rate()},
-                {binary_classification.accuracy()},
-                {binary_classification.f1_score()},
-                {binary_classification.matthews_correlation_coefficient()},)""")
+        conn.execute(
+            f' INSERT INTO "{self.table_name}" VALUES ( '
+            f"'{directory_path}',"
+            f"{rank_stats.top},"
+            f"{rank_stats.top3},"
+            f"{rank_stats.top5},"
+            f"{rank_stats.top10},"
+            f"{rank_stats.found},"
+            f"{rank_stats.total},"
+            f"{rank_stats.mean_reciprocal_rank()},"
+            f"{rank_stats.percentage_top()},"
+            f"{rank_stats.percentage_top3()},"
+            f"{rank_stats.percentage_top5()},"
+            f"{rank_stats.percentage_top10()},"
+            f"{rank_stats.percentage_found()},"
+            f"{rank_stats.precision_at_k(1)},"
+            f"{rank_stats.precision_at_k(3)},"
+            f"{rank_stats.precision_at_k(5)},"
+            f"{rank_stats.precision_at_k(10)},"
+            f"{rank_stats.mean_average_precision_at_k(1)},"
+            f"{rank_stats.mean_average_precision_at_k(3)},"
+            f"{rank_stats.mean_average_precision_at_k(5)},"
+            f"{rank_stats.mean_average_precision_at_k(10)},"
+            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 1)},"
+            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 3)},"
+            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 5)},"
+            f"{rank_stats.f_beta_score_at_k(rank_stats.percentage_top(), 10)},"
+            f"{rank_stats.mean_normalised_discounted_cumulative_gain(3)},"
+            f"{rank_stats.mean_normalised_discounted_cumulative_gain(5)},"
+            f"{rank_stats.mean_normalised_discounted_cumulative_gain(10)},"
+            f"{binary_classification.true_positives},"
+            f"{binary_classification.false_positives},"
+            f"{binary_classification.true_negatives},"
+            f"{binary_classification.false_negatives},"
+            f"{binary_classification.sensitivity()},"
+            f"{binary_classification.specificity()},"
+            f"{binary_classification.precision()},"
+            f"{binary_classification.negative_predictive_value()},"
+            f"{binary_classification.false_positive_rate()},"
+            f"{binary_classification.false_discovery_rate()},"
+            f"{binary_classification.false_negative_rate()},"
+            f"{binary_classification.accuracy()},"
+            f"{binary_classification.f1_score()},"
+            f"{binary_classification.matthews_correlation_coefficient()})"
+        )
+
         conn.close()
diff --git a/src/pheval/analyse/variant_prioritisation_analysis.py b/src/pheval/analyse/variant_prioritisation_analysis.py
index 87108fa70..6ea131aa0 100644
--- a/src/pheval/analyse/variant_prioritisation_analysis.py
+++ b/src/pheval/analyse/variant_prioritisation_analysis.py
@@ -1,4 +1,3 @@
-from collections import defaultdict
 from pathlib import Path
 from typing import List
 
@@ -17,12 +16,12 @@ class AssessVariantPrioritisation:
     """Class for assessing variant prioritisation based on thresholds and scoring orders."""
 
     def __init__(
-            self,
-            db_connection: DBConnector,
-            table_name: str,
-            results_dir: Path,
-            threshold: float,
-            score_order: str,
+        self,
+        db_connection: DBConnector,
+        table_name: str,
+        results_dir: Path,
+        threshold: float,
+        score_order: str,
     ):
         """
         Initialise AssessVariantPrioritisation class
@@ -39,10 +38,12 @@ def __init__(
         self.conn = db_connection.conn
         self.column = str(self.results_dir.parents[0])
         self.table_name = table_name
-        db_connection.add_column_integer_default(table_name=table_name, column=self.column, default=0)
+        db_connection.add_column_integer_default(
+            table_name=table_name, column=self.column, default=0
+        )
 
     def _assess_variant_with_threshold_ascending_order(
-            self, result_entry: RankedPhEvalVariantResult
+        self, result_entry: RankedPhEvalVariantResult
     ) -> int:
         """
         Record the variant prioritisation rank if it meets the ascending order threshold.
@@ -61,9 +62,7 @@ def _assess_variant_with_threshold_ascending_order(
         else:
             return 0
 
-    def _assess_variant_with_threshold(
-            self, result_entry: RankedPhEvalVariantResult
-    ) -> int:
+    def _assess_variant_with_threshold(self, result_entry: RankedPhEvalVariantResult) -> int:
         """
         Record the variant prioritisation rank if it meets the score threshold.
 
@@ -82,7 +81,7 @@ def _assess_variant_with_threshold(
             return 0
 
     def _record_matched_variant(
-            self, standardised_variant_result: RankedPhEvalVariantResult
+        self, standardised_variant_result: RankedPhEvalVariantResult
     ) -> int:
         """
         Return the variant rank result - handling the specification of a threshold.
@@ -109,10 +108,10 @@ def _record_matched_variant(
             )
 
     def assess_variant_prioritisation(
-            self,
-            standardised_variant_results: List[RankedPhEvalVariantResult],
-            phenopacket_path: Path,
-            binary_classification_stats: BinaryClassificationStats,
+        self,
+        standardised_variant_results: List[RankedPhEvalVariantResult],
+        phenopacket_path: Path,
+        binary_classification_stats: BinaryClassificationStats,
     ) -> None:
         """
         Assess variant prioritisation.
@@ -127,37 +126,46 @@ def assess_variant_prioritisation(
         """
         relevant_ranks = []
         df = self.conn.execute(
-            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'""").fetchdf()
-        for i, row in df.iterrows():
-            causative_variant = GenomicVariant(chrom=row["chrom"],
-                                               pos=int(row["pos"]),
-                                               ref=row["ref"],
-                                               alt=row["alt"], )
-            generated_matches = list(result for result in standardised_variant_results if
-                                     causative_variant == GenomicVariant(chrom=result.chromosome,
-                                                                         pos=result.start,
-                                                                         alt=result.alt,
-                                                                         ref=result.ref, ))
+            f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'"""
+        ).fetchdf()
+        for _i, row in df.iterrows():
+            causative_variant = GenomicVariant(
+                chrom=row["chrom"],
+                pos=int(row["pos"]),
+                ref=row["ref"],
+                alt=row["alt"],
+            )
+            generated_matches = list(
+                result
+                for result in standardised_variant_results
+                if causative_variant
+                == GenomicVariant(
+                    chrom=result.chromosome,
+                    pos=result.start,
+                    alt=result.alt,
+                    ref=result.ref,
+                )
+            )
             if len(generated_matches) > 0:
                 variant_match = self._record_matched_variant(generated_matches[0])
                 relevant_ranks.append(variant_match)
-                primary_key = (f"{phenopacket_path.name}-{causative_variant.chrom}-{causative_variant.pos}-"
-                               f"{causative_variant.ref}-{causative_variant.alt}")
+                primary_key = (
+                    f"{phenopacket_path.name}-{causative_variant.chrom}-{causative_variant.pos}-"
+                    f"{causative_variant.ref}-{causative_variant.alt}"
+                )
                 self.conn.execute(
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (variant_match, primary_key),
                 )
 
-        binary_classification_stats.add_classification(
-            standardised_variant_results, relevant_ranks
-        )
+        binary_classification_stats.add_classification(standardised_variant_results, relevant_ranks)
 
 
 def assess_phenopacket_variant_prioritisation(
-        phenopacket_path: Path,
-        results_dir_and_input: TrackInputOutputDirectories,
-        variant_binary_classification_stats: BinaryClassificationStats,
-        variant_benchmarker: AssessVariantPrioritisation
+    phenopacket_path: Path,
+    results_dir_and_input: TrackInputOutputDirectories,
+    variant_binary_classification_stats: BinaryClassificationStats,
+    variant_benchmarker: AssessVariantPrioritisation,
 ) -> None:
     """
     Assess variant prioritisation for a Phenopacket by comparing PhEval standardised variant results
@@ -176,13 +184,14 @@ def assess_phenopacket_variant_prioritisation(
     variant_benchmarker.assess_variant_prioritisation(
         parse_pheval_result(RankedPhEvalVariantResult, pheval_variant_result),
         phenopacket_path,
-        variant_binary_classification_stats)
+        variant_binary_classification_stats,
+    )
 
 
 def benchmark_variant_prioritisation(
-        results_directory_and_input: TrackInputOutputDirectories,
-        score_order: str,
-        threshold: float,
+    results_directory_and_input: TrackInputOutputDirectories,
+    score_order: str,
+    threshold: float,
 ):
     """
     Benchmark a directory based on variant prioritisation results.
@@ -198,25 +207,25 @@ def benchmark_variant_prioritisation(
     """
     variant_binary_classification_stats = BinaryClassificationStats()
     db_connection = DBConnector()
-    variant_benchmarker = AssessVariantPrioritisation(db_connection,
-                                                      f"{results_directory_and_input.phenopacket_dir.parents[0].name}"
-                                                      f"_variant",
-                                                      results_directory_and_input.results_dir.joinpath(
-                                                          "pheval_variant_results/"),
-                                                      threshold,
-                                                      score_order,
-                                                      )
+    variant_benchmarker = AssessVariantPrioritisation(
+        db_connection,
+        f"{results_directory_and_input.phenopacket_dir.parents[0].name}" f"_variant",
+        results_directory_and_input.results_dir.joinpath("pheval_variant_results/"),
+        threshold,
+        score_order,
+    )
     for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
         assess_phenopacket_variant_prioritisation(
             phenopacket_path,
             results_directory_and_input,
             variant_binary_classification_stats,
-            variant_benchmarker
+            variant_benchmarker,
         )
     variant_rank_stats = RankStats()
     variant_rank_stats.add_ranks(
-        table_name=f'{results_directory_and_input.phenopacket_dir.parents[0].name}_variant',
-        column_name=str(results_directory_and_input.results_dir))
+        table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_variant",
+        column_name=str(results_directory_and_input.results_dir),
+    )
     return BenchmarkRunResults(
         results_dir=results_directory_and_input.results_dir,
         rank_stats=variant_rank_stats,

From c62eb9e9acdc0b7a0852ca3a54328d5eab34e505 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 30 Jul 2024 14:40:35 +0100
Subject: [PATCH 32/81] allow S608

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index 188922454..8e32c595e 100644
--- a/tox.ini
+++ b/tox.ini
@@ -63,7 +63,7 @@ ignore =
     #S307 # Use of possibly insecure function - consider using safer ast.literal_eval.
     #S603 # subprocess call - check for execution of untrusted input.
     #S607 # Starting a process with a partial executable path ["open" in both cases]
-    #S608 # Possible SQL injection vector through string-based query construction.
+    S608 # Possible SQL injection vector through string-based query construction.
     #B024 # StreamingWriter is an abstract base class, but it has no abstract methods. 
          # Remember to use @abstractmethod, @abstractclassmethod and/or @abstractproperty decorators.
     #B027 # empty method in an abstract base class, but has no abstract decorator. Consider adding @abstractmethod

From 1ee8b1499f67efa8d92eaabbbe26c37fbf89162d Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 30 Jul 2024 16:23:23 +0100
Subject: [PATCH 33/81] remove unused methods

---
 src/pheval/analyse/gene_prioritisation_analysis.py | 4 +---
 src/pheval/analyse/get_connection.py               | 8 --------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 2a00b47c7..de119b0bc 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -162,10 +162,8 @@ def assess_gene_prioritisation(
                 gene_match = self._record_matched_gene(generated_matches[0])
                 relevant_ranks.append(gene_match)
                 primary_key = f"{phenopacket_path.name}-{row['gene_symbol']}"
-                safe_table_name = self.db_connection.sanitize_identifier(self.table_name)
-                safe_column = self.db_connection.sanitize_identifier(self.column)
                 self.conn.execute(
-                    f"UPDATE {safe_table_name} SET {safe_column} = ? WHERE identifier = ?",
+                    f"UPDATE {self.table_name} SET \"{self.column}\" = ? WHERE identifier = ?",
                     (gene_match, primary_key),
                 )
         binary_classification_stats.add_classification(
diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index d12004288..40dfc76f5 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -1,14 +1,6 @@
 import duckdb
 from duckdb import DuckDBPyConnection
 
-
-def dict_factory(cursor, row):
-    d = {}
-    for idx, col in enumerate(cursor.description):
-        d[col[0]] = row[idx]
-    return d
-
-
 class DBConnector:
 
     def __init__(self):

From aaed1841764a51b808425f2ae9602392fea0a485 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Wed, 31 Jul 2024 12:15:37 +0100
Subject: [PATCH 34/81] remove unused methods

---
 .../analyse/prioritisation_rank_recorder.py   | 83 -------------------
 1 file changed, 83 deletions(-)
 delete mode 100644 src/pheval/analyse/prioritisation_rank_recorder.py

diff --git a/src/pheval/analyse/prioritisation_rank_recorder.py b/src/pheval/analyse/prioritisation_rank_recorder.py
deleted file mode 100644
index 6f93b0ccb..000000000
--- a/src/pheval/analyse/prioritisation_rank_recorder.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from collections import defaultdict
-from dataclasses import dataclass
-from pathlib import Path
-from typing import Union
-
-from pheval.analyse.prioritisation_result_types import (
-    DiseasePrioritisationResult,
-    GenePrioritisationResult,
-    VariantPrioritisationResult,
-)
-
-
-@dataclass
-class PrioritisationRankRecorder:
-    """
-    Record ranks for different types of prioritisation results.
-
-    Attributes:
-        index (int): The index representing the run.
-        directory (Path): The result directory path.
-        prioritisation_result (Union[GenePrioritisationResult, VariantPrioritisationResult,
-            DiseasePrioritisationResult]): The prioritisation result object.
-        run_comparison (defaultdict): The comparison dictionary to record ranks.
-    """
-
-    index: int
-    directory: Path
-    prioritisation_result: Union[
-        GenePrioritisationResult, VariantPrioritisationResult, DiseasePrioritisationResult
-    ]
-    run_comparison: defaultdict
-
-    def _record_gene_rank(self) -> None:
-        """
-        Record gene prioritisation rank.
-
-        This method updates the 'Gene' key in the run comparison dictionary with the gene
-        information extracted from the correct prioritisation result.
-        """
-        self.run_comparison[self.index]["Gene"] = self.prioritisation_result.gene
-
-    def _record_variant_rank(self) -> None:
-        """
-        Record variant prioritisation rank.
-
-        This method updates the 'Variant' key in the run comparison dictionary with the variant
-        information extracted from the correct prioritisation result.
-        """
-        variant = self.prioritisation_result.variant
-        self.run_comparison[self.index]["Variant"] = "-".join(
-            [variant.chrom, str(variant.pos), variant.ref, variant.alt]
-        )
-
-    def _record_disease_rank(self) -> None:
-        """
-        Record disease prioritisation rank.
-
-        This method updates the 'Disease' key in the run comparison dictionary with the disease
-        information extracted from the correct prioritisation result.
-        """
-        self.run_comparison[self.index][
-            "Disease"
-        ] = self.prioritisation_result.disease.disease_identifier
-
-    def record_rank(self) -> None:
-        """
-        Record the prioritisation ranks for different runs.
-
-        It assigns the prioritisation rank and associated details such as phenopacket name
-        and prioritisation result type ('Gene', 'Variant', or 'Disease') to the run comparison
-        dictionary for each respective run, allowing comparison and analysis of the ranks of correct results
-        across different runs.
-        """
-        self.run_comparison[self.index][
-            "Phenopacket"
-        ] = self.prioritisation_result.phenopacket_path.name
-        if type(self.prioritisation_result) is GenePrioritisationResult:
-            self._record_gene_rank()
-        elif type(self.prioritisation_result) is VariantPrioritisationResult:
-            self._record_variant_rank()
-        elif type(self.prioritisation_result) is DiseasePrioritisationResult:
-            self._record_disease_rank()
-        self.run_comparison[self.index][self.directory] = self.prioritisation_result.rank

From dbd05f08cbbd3994fa521bf81cc35c01ffb6ded1 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Wed, 31 Jul 2024 12:15:40 +0100
Subject: [PATCH 35/81] remove unused methods

---
 tests/test_analysis.py | 341 -----------------------------------------
 1 file changed, 341 deletions(-)

diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index abbc650fb..fb464e491 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -6,7 +6,6 @@
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
 from pheval.analyse.disease_prioritisation_analysis import AssessDiseasePrioritisation
 from pheval.analyse.gene_prioritisation_analysis import AssessGenePrioritisation
-from pheval.analyse.prioritisation_rank_recorder import PrioritisationRankRecorder
 from pheval.analyse.prioritisation_result_types import (
     DiseasePrioritisationResult,
     GenePrioritisationResult,
@@ -21,346 +20,6 @@
 )
 from pheval.utils.phenopacket_utils import GenomicVariant, ProbandCausativeGene, ProbandDisease
 
-
-class TestPrioritisationRankRecorder(unittest.TestCase):
-    def setUp(self) -> None:
-        self.add_new_phenopacket_variant_record = PrioritisationRankRecorder(
-            1,
-            Path("directory1"),
-            VariantPrioritisationResult(
-                Path("/path/to/phenopacket-2.json"), GenomicVariant("1", 4896347, "C", "T"), 9
-            ),
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Variant": "12-120434-A-G",
-                        Path("directory1"): 3,
-                    }
-                },
-            ),
-        )
-        self.add_new_directory_variant_record = PrioritisationRankRecorder(
-            0,
-            Path("directory2"),
-            VariantPrioritisationResult(
-                Path("/path/to/phenopacket-1.json"), GenomicVariant("12", 120434, "A", "G"), 9
-            ),
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Variant": "12-120434-A-G",
-                        Path("directory1"): 3,
-                    }
-                },
-            ),
-        )
-        self.add_new_phenopacket_gene_record = PrioritisationRankRecorder(
-            1,
-            Path("directory1"),
-            GenePrioritisationResult(Path("/path/to/phenopacket-2.json"), "GENE", 7),
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Gene": "LARGE1",
-                        PosixPath("directory1"): 4,
-                    }
-                },
-            ),
-        )
-        self.add_new_directory_gene_record = PrioritisationRankRecorder(
-            0,
-            Path("directory2"),
-            GenePrioritisationResult(Path("/path/to/phenopacket-1.json"), "LARGE1", 1),
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Gene": "LARGE1",
-                        PosixPath("directory1"): 4,
-                    }
-                },
-            ),
-        )
-        self.add_new_directory_disease_record = PrioritisationRankRecorder(
-            0,
-            Path("directory2"),
-            DiseasePrioritisationResult(
-                Path("/path/to/phenopacket-1.json"),
-                ProbandDisease(disease_name="DISEASE1", disease_identifier="OMIM:12345"),
-                1,
-            ),
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Disease": "OMIM:12345",
-                        PosixPath("directory1"): 4,
-                    }
-                },
-            ),
-        )
-        self.add_new_phenopacket_disease_record = PrioritisationRankRecorder(
-            1,
-            Path("directory1"),
-            DiseasePrioritisationResult(
-                Path("/path/to/phenopacket-2.json"),
-                ProbandDisease(disease_name="DISEASE1", disease_identifier="OMIM:12345"),
-                7,
-            ),
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Disease": "OMIM:12345",
-                        PosixPath("directory1"): 4,
-                    }
-                },
-            ),
-        )
-
-    def test__record_gene_rank_new_directory(self):
-        self.assertEqual(
-            self.add_new_directory_gene_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Gene": "LARGE1",
-                        PosixPath("directory1"): 4,
-                    }
-                },
-            ),
-        )
-        self.add_new_directory_gene_record.record_rank()
-        self.assertEqual(
-            self.add_new_directory_gene_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Gene": "LARGE1",
-                        PosixPath("directory1"): 4,
-                        PosixPath("directory2"): 1,
-                    }
-                },
-            ),
-        )
-
-    def test__record_gene_rank_new_phenopacket(self):
-        self.assertEqual(
-            self.add_new_phenopacket_gene_record.run_comparison,
-            defaultdict(
-                dict,
-                {0: {"Phenopacket": "phenopacket-1.json", "Gene": "LARGE1", Path("directory1"): 4}},
-            ),
-        )
-        self.add_new_phenopacket_gene_record.record_rank()
-        self.assertEqual(
-            self.add_new_phenopacket_gene_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Gene": "LARGE1",
-                        Path("directory1"): 4,
-                    },
-                    1: {"Phenopacket": "phenopacket-2.json", "Gene": "GENE", Path("directory1"): 7},
-                },
-            ),
-        )
-
-    def test__variant_rank_new_directory(self):
-        self.assertEqual(
-            self.add_new_directory_variant_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Variant": "12-120434-A-G",
-                        Path("directory1"): 3,
-                    }
-                },
-            ),
-        )
-        self.add_new_directory_variant_record.record_rank()
-        self.assertEqual(
-            self.add_new_directory_variant_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Variant": "12-120434-A-G",
-                        PosixPath("directory1"): 3,
-                        PosixPath("directory2"): 9,
-                    }
-                },
-            ),
-        )
-
-    def test__variant_rank_new_phenopacket(self):
-        self.assertEqual(
-            self.add_new_phenopacket_variant_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Variant": "12-120434-A-G",
-                        Path("directory1"): 3,
-                    }
-                },
-            ),
-        )
-        self.add_new_phenopacket_variant_record.record_rank()
-        self.assertEqual(
-            self.add_new_phenopacket_variant_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Variant": "12-120434-A-G",
-                        PosixPath("directory1"): 3,
-                    },
-                    1: {
-                        "Phenopacket": "phenopacket-2.json",
-                        "Variant": "1-4896347-C-T",
-                        PosixPath("directory1"): 9,
-                    },
-                },
-            ),
-        )
-
-    def test__disease_rank_new_directory(self):
-        self.assertEqual(
-            self.add_new_directory_disease_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Disease": "OMIM:12345",
-                        Path("directory1"): 4,
-                    }
-                },
-            ),
-        )
-        self.add_new_directory_disease_record.record_rank()
-        self.assertEqual(
-            self.add_new_directory_disease_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Disease": "OMIM:12345",
-                        PosixPath("directory1"): 4,
-                        PosixPath("directory2"): 1,
-                    }
-                },
-            ),
-        )
-
-    def test__disease_rank_new_phenopacket(self):
-        self.assertEqual(
-            self.add_new_phenopacket_disease_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Disease": "OMIM:12345",
-                        Path("directory1"): 4,
-                    }
-                },
-            ),
-        )
-        self.add_new_phenopacket_disease_record.record_rank()
-        self.assertEqual(
-            self.add_new_phenopacket_disease_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Disease": "OMIM:12345",
-                        PosixPath("directory1"): 4,
-                    },
-                    1: {
-                        "Phenopacket": "phenopacket-2.json",
-                        "Disease": "OMIM:12345",
-                        PosixPath("directory1"): 7,
-                    },
-                },
-            ),
-        )
-
-    def test_record_rank_gene(self):
-        self.add_new_directory_gene_record.record_rank()
-        self.assertEqual(
-            self.add_new_directory_gene_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Gene": "LARGE1",
-                        PosixPath("directory1"): 4,
-                        PosixPath("directory2"): 1,
-                    }
-                },
-            ),
-        )
-
-    def test_record_rank_variant(self):
-        self.add_new_directory_variant_record.record_rank()
-        self.assertEqual(
-            self.add_new_directory_variant_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Variant": "12-120434-A-G",
-                        PosixPath("directory1"): 3,
-                        PosixPath("directory2"): 9,
-                    }
-                },
-            ),
-        )
-
-    def test_record_rank_disease(self):
-        self.add_new_directory_disease_record.record_rank()
-        self.assertEqual(
-            self.add_new_directory_disease_record.run_comparison,
-            defaultdict(
-                dict,
-                {
-                    0: {
-                        "Phenopacket": "phenopacket-1.json",
-                        "Disease": "OMIM:12345",
-                        PosixPath("directory1"): 4,
-                        PosixPath("directory2"): 1,
-                    }
-                },
-            ),
-        )
-
-
 class TestAssessGenePrioritisation(unittest.TestCase):
     def setUp(self) -> None:
         self.assess_gene_prioritisation = AssessGenePrioritisation(

From 47e1c1f40e71fe7b4277b6c6982b70eacd03f964 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 1 Aug 2024 14:22:22 +0100
Subject: [PATCH 36/81] fix SQL statement

---
 src/pheval/analyse/gene_prioritisation_analysis.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index de119b0bc..9fd3c1c1c 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -163,7 +163,7 @@ def assess_gene_prioritisation(
                 relevant_ranks.append(gene_match)
                 primary_key = f"{phenopacket_path.name}-{row['gene_symbol']}"
                 self.conn.execute(
-                    f"UPDATE {self.table_name} SET \"{self.column}\" = ? WHERE identifier = ?",
+                    f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (gene_match, primary_key),
                 )
         binary_classification_stats.add_classification(

From 54909f62c8625f29efcb21f9763558eb67d27e91 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 1 Aug 2024 14:22:40 +0100
Subject: [PATCH 37/81] tox lint

---
 src/pheval/analyse/get_connection.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index 40dfc76f5..88fa29dd9 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -1,6 +1,7 @@
 import duckdb
 from duckdb import DuckDBPyConnection
 
+
 class DBConnector:
 
     def __init__(self):
@@ -16,7 +17,7 @@ def add_column_integer_default(self, table_name: str, column: str, default: int
             self.conn.execute(
                 f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}'
             )
-            self.conn.execute('UPDATE {table_name} SET "{column}" = ?', (default,))
+            self.conn.execute(f'UPDATE {table_name} SET "{column}" = ?', (default,))
             self.conn.commit()
         except duckdb.CatalogException:
             pass

From dd4fc79a79ab11b3c2f090d1b74895daa37e03b9 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 1 Aug 2024 14:22:59 +0100
Subject: [PATCH 38/81] update tests to utilise DuckDB

---
 tests/test_analysis.py | 1155 ++++++++--------------------------------
 1 file changed, 211 insertions(+), 944 deletions(-)

diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index fb464e491..84e5fdcaf 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -1,139 +1,103 @@
 import unittest
-from collections import defaultdict
 from copy import copy
-from pathlib import Path, PosixPath
+from pathlib import Path
+from unittest.mock import patch
+
+import duckdb
 
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
 from pheval.analyse.disease_prioritisation_analysis import AssessDiseasePrioritisation
 from pheval.analyse.gene_prioritisation_analysis import AssessGenePrioritisation
-from pheval.analyse.prioritisation_result_types import (
-    DiseasePrioritisationResult,
-    GenePrioritisationResult,
-    VariantPrioritisationResult,
-)
-from pheval.analyse.rank_stats import RankStats
+from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.variant_prioritisation_analysis import AssessVariantPrioritisation
 from pheval.post_processing.post_processing import (
     RankedPhEvalDiseaseResult,
     RankedPhEvalGeneResult,
     RankedPhEvalVariantResult,
 )
-from pheval.utils.phenopacket_utils import GenomicVariant, ProbandCausativeGene, ProbandDisease
+
 
 class TestAssessGenePrioritisation(unittest.TestCase):
-    def setUp(self) -> None:
+    @classmethod
+    def setUpClass(cls):
+        cls.db_connection = duckdb.connect(":memory:")
+        cls.db_connection.execute(
+            "CREATE TABLE test_table_gene (identifier VARCHAR(255) PRIMARY KEY, "
+            "phenopacket VARCHAR, gene_symbol VARCHAR, gene_identifier VARCHAR)"
+        )
+        cls.db_connection.execute(
+            "INSERT INTO test_table_gene (identifier, phenopacket, gene_symbol, gene_identifier) VALUES "
+            "('phenopacket_1.json-PLXNA1', 'phenopacket_1.json', 'PLXNA1', 'ENSG00000114554'),"
+            "('phenopacket_1.json-LARGE1', 'phenopacket_1.json', 'LARGE1', 'ENSG00000133424'),"
+        )
+        cls.standardised_gene_results = [
+            RankedPhEvalGeneResult(
+                gene_symbol="PLXNA1",
+                gene_identifier="ENSG00000114554",
+                score=0.8764,
+                rank=1,
+            ),
+            RankedPhEvalGeneResult(
+                gene_symbol="ZNF804B",
+                gene_identifier="ENSG00000182348",
+                score=0.5777,
+                rank=2,
+            ),
+            RankedPhEvalGeneResult(
+                gene_symbol="SMCO2",
+                gene_identifier="ENSG00000165935",
+                score=0.5777,
+                rank=2,
+            ),
+            RankedPhEvalGeneResult(
+                gene_symbol="SPNS1",
+                gene_identifier="ENSG00000169682",
+                score=0.3765,
+                rank=4,
+            ),
+        ]
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.db_connection.close()
+
+    def setUp(self):
+        patcher = patch(
+            "pheval.analyse.get_connection.DBConnector.get_connection",
+            return_value=self.db_connection,
+        )
+        self.mock_get_connection = patcher.start()
+        self.addCleanup(patcher.stop)
+        self.db_connector = DBConnector()
         self.assess_gene_prioritisation = AssessGenePrioritisation(
-            phenopacket_path=Path("/path/to/phenopacket.json"),
+            db_connection=self.db_connector,
+            table_name="test_table_gene",
             results_dir=Path("/path/to/results_dir"),
-            standardised_gene_results=[
-                RankedPhEvalGeneResult(
-                    gene_symbol="PLXNA1",
-                    gene_identifier="ENSG00000114554",
-                    score=0.8764,
-                    rank=1,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="ZNF804B",
-                    gene_identifier="ENSG00000182348",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="SMCO2",
-                    gene_identifier="ENSG00000165935",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="SPNS1",
-                    gene_identifier="ENSG00000169682",
-                    score=0.3765,
-                    rank=4,
-                ),
-            ],
-            threshold=0.0,
+            threshold=0,
             score_order="descending",
-            proband_causative_genes=[
-                ProbandCausativeGene(gene_symbol="PLXNA1", gene_identifier="ENSG00000114554"),
-                ProbandCausativeGene(gene_symbol="LARGE1", gene_identifier="ENSG00000133424"),
-            ],
         )
         self.assess_gene_prioritisation_ascending_order = AssessGenePrioritisation(
-            phenopacket_path=Path("/path/to/phenopacket.json"),
+            db_connection=self.db_connector,
+            table_name="test_table_gene",
             results_dir=Path("/path/to/results_dir"),
-            standardised_gene_results=[
-                RankedPhEvalGeneResult(
-                    gene_symbol="SPNS1",
-                    gene_identifier="ENSG00000169682",
-                    score=0.3765,
-                    rank=1,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="ZNF804B",
-                    gene_identifier="ENSG00000182348",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="SMCO2",
-                    gene_identifier="ENSG00000165935",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="['PLXNA1', 'GENE2']",
-                    gene_identifier="['ENSG00000114554', 'ENSG00000100000']",
-                    score=0.8764,
-                    rank=4,
-                ),
-            ],
-            threshold=0.0,
+            threshold=0,
             score_order="ascending",
-            proband_causative_genes=[
-                ProbandCausativeGene(gene_symbol="PLXNA1", gene_identifier="ENSG00000114554"),
-                ProbandCausativeGene(gene_symbol="LARGE1", gene_identifier="ENSG00000133424"),
-            ],
         )
-        self.gene_rank_stats = RankStats(0, 0, 0, 0, 0)
-        self.gene_rank_records = defaultdict(dict)
         self.binary_classification_stats = BinaryClassificationStats()
 
-    def test_record_gene_prioritisation_match(self):
-        self.assertEqual(
-            self.assess_gene_prioritisation._record_gene_prioritisation_match(
-                gene=ProbandCausativeGene(gene_symbol="PLXNA1", gene_identifier="ENSG00000114554"),
-                result_entry=RankedPhEvalGeneResult(
-                    gene_symbol="PLXNA1",
-                    gene_identifier="ENSG00000114554",
-                    score=0.8764,
-                    rank=1,
-                ),
-                rank_stats=self.gene_rank_stats,
-            ),
-            GenePrioritisationResult(
-                phenopacket_path=Path("/path/to/phenopacket.json"), gene="PLXNA1", rank=1
-            ),
-        )
-
     def test_assess_gene_with_ascending_order_threshold_fails_cutoff(self):
         assess_ascending_order_threshold = copy(self.assess_gene_prioritisation_ascending_order)
         assess_ascending_order_threshold.threshold = 0.1
         self.assertEqual(
             assess_ascending_order_threshold._assess_gene_with_threshold_ascending_order(
-                gene=ProbandCausativeGene(gene_symbol="PLXNA1", gene_identifier="ENSG00000114554"),
                 result_entry=RankedPhEvalGeneResult(
                     gene_symbol="PLXNA1",
                     gene_identifier="ENSG00000114554",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.gene_rank_stats,
             ),
-            None,
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(top=0, top3=0, top5=0, top10=0, found=0, total=0, reciprocal_ranks=[]),
+            0,
         )
 
     def test_assess_gene_with_ascending_order_threshold_meets_cutoff(self):
@@ -141,22 +105,14 @@ def test_assess_gene_with_ascending_order_threshold_meets_cutoff(self):
         assess_ascending_order_threshold.threshold = 0.9
         self.assertEqual(
             assess_ascending_order_threshold._assess_gene_with_threshold_ascending_order(
-                gene=ProbandCausativeGene(gene_symbol="PLXNA1", gene_identifier="ENSG00000114554"),
                 result_entry=RankedPhEvalGeneResult(
                     gene_symbol="PLXNA1",
                     gene_identifier="ENSG00000114554",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.gene_rank_stats,
-            ),
-            GenePrioritisationResult(
-                phenopacket_path=Path("/path/to/phenopacket.json"), gene="PLXNA1", rank=1
             ),
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(top=1, top3=1, top5=1, top10=1, found=1, total=0, reciprocal_ranks=[1.0]),
+            1,
         )
 
     def test_assess_gene_with_threshold_fails_cutoff(self):
@@ -164,20 +120,14 @@ def test_assess_gene_with_threshold_fails_cutoff(self):
         assess_with_threshold.threshold = 0.9
         self.assertEqual(
             assess_with_threshold._assess_gene_with_threshold(
-                gene=ProbandCausativeGene(gene_symbol="PLXNA1", gene_identifier="ENSG00000114554"),
                 result_entry=RankedPhEvalGeneResult(
                     gene_symbol="PLXNA1",
                     gene_identifier="ENSG00000114554",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.gene_rank_stats,
             ),
-            None,
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(top=0, top3=0, top5=0, top10=0, found=0, total=0, reciprocal_ranks=[]),
+            0,
         )
 
     def test_assess_gene_with_threshold_meets_cutoff(self):
@@ -185,239 +135,29 @@ def test_assess_gene_with_threshold_meets_cutoff(self):
         assess_with_threshold.threshold = 0.5
         self.assertEqual(
             assess_with_threshold._assess_gene_with_threshold(
-                gene=ProbandCausativeGene(gene_symbol="PLXNA1", gene_identifier="ENSG00000114554"),
                 result_entry=RankedPhEvalGeneResult(
                     gene_symbol="PLXNA1",
                     gene_identifier="ENSG00000114554",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.gene_rank_stats,
-            ),
-            GenePrioritisationResult(
-                phenopacket_path=Path("/path/to/phenopacket.json"), gene="PLXNA1", rank=1
             ),
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(top=1, top3=1, top5=1, top10=1, found=1, total=0, reciprocal_ranks=[1.0]),
+            1,
         )
 
     def test_assess_gene_prioritisation_no_threshold(self):
         self.assess_gene_prioritisation.assess_gene_prioritisation(
-            self.gene_rank_stats, self.gene_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(
-                top=1,
-                top3=1,
-                top5=1,
-                top10=1,
-                found=1,
-                total=2,
-                reciprocal_ranks=[1.0],
-                relevant_result_ranks=[[1]],
-            ),
-        )
-        self.assertEqual(
-            self.gene_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "PLXNA1",
-                    Path("/path/to/results_dir"): 1,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "LARGE1",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
+            self.standardised_gene_results,
+            Path("/path/to/phenopacket_1.json"),
             self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=1,
-                true_negatives=3,
-                false_positives=0,
-                false_negatives=0,
-                labels=[1, 0, 0, 0],
-                scores=[0.8764, 0.5777, 0.5777, 0.3765],
-            ),
-        )
-
-    def test_assess_gene_prioritisation_threshold_fails_ascending_order_cutoff(self):
-        assess_with_threshold = copy(self.assess_gene_prioritisation_ascending_order)
-        assess_with_threshold.threshold = 0.01
-        assess_with_threshold.assess_gene_prioritisation(
-            self.gene_rank_stats, self.gene_rank_records, self.binary_classification_stats
         )
+        self.db_connector.conn.execute("SELECT * FROM test_table_gene")
         self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=0,
-                top10=0,
-                found=0,
-                total=2,
-                reciprocal_ranks=[],
-                relevant_result_ranks=[[0]],
-            ),
-        )
-        self.assertEqual(
-            self.gene_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "PLXNA1",
-                    Path("/path/to/results_dir"): 0,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "LARGE1",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=0,
-                true_negatives=3,
-                false_positives=1,
-                false_negatives=1,
-                labels=[0, 0, 0, 0],
-                scores=[0.3765, 0.5777, 0.5777, 0.8764],
-            ),
-        )
-
-    def test_assess_gene_prioritisation_threshold_meets_ascending_order_cutoff(self):
-        assess_with_threshold = copy(self.assess_gene_prioritisation_ascending_order)
-        assess_with_threshold.threshold = 0.9
-        assess_with_threshold.assess_gene_prioritisation(
-            self.gene_rank_stats, self.gene_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=1,
-                top10=1,
-                found=1,
-                total=2,
-                reciprocal_ranks=[0.25],
-                relevant_result_ranks=[[4]],
-            ),
-        )
-        self.assertEqual(
-            self.gene_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "PLXNA1",
-                    Path("/path/to/results_dir"): 4,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "LARGE1",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=0,
-                true_negatives=2,
-                false_positives=1,
-                false_negatives=1,
-                labels=[0, 0, 0, 1],
-                scores=[0.3765, 0.5777, 0.5777, 0.8764],
-            ),
-        )
-
-    def test_assess_gene_prioritisation_threshold_fails_cutoff(self):
-        assess_with_threshold = copy(self.assess_gene_prioritisation)
-        assess_with_threshold.threshold = 0.9
-        assess_with_threshold.assess_gene_prioritisation(
-            self.gene_rank_stats, self.gene_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=0,
-                top10=0,
-                found=0,
-                total=2,
-                reciprocal_ranks=[],
-                relevant_result_ranks=[[0]],
-            ),
-        )
-        self.assertEqual(
-            self.gene_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "PLXNA1",
-                    Path("/path/to/results_dir"): 0,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "LARGE1",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=0,
-                true_negatives=3,
-                false_positives=1,
-                false_negatives=1,
-                labels=[0, 0, 0, 0],
-                scores=[0.8764, 0.5777, 0.5777, 0.3765],
-            ),
-        )
-
-    def test_assess_gene_prioritisation_threshold_meets_cutoff(self):
-        assess_with_threshold = copy(self.assess_gene_prioritisation)
-        assess_with_threshold.threshold = 0.1
-        assess_with_threshold.assess_gene_prioritisation(
-            self.gene_rank_stats, self.gene_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.gene_rank_stats,
-            RankStats(
-                top=1,
-                top3=1,
-                top5=1,
-                top10=1,
-                found=1,
-                total=2,
-                reciprocal_ranks=[1.0],
-                relevant_result_ranks=[[1]],
-            ),
-        )
-        self.assertEqual(
-            self.gene_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "PLXNA1",
-                    Path("/path/to/results_dir"): 1,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Gene": "LARGE1",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
+            self.db_connector.conn.fetchall(),
+            [
+                ("phenopacket_1.json-PLXNA1", "phenopacket_1.json", "PLXNA1", "ENSG00000114554", 1),
+                ("phenopacket_1.json-LARGE1", "phenopacket_1.json", "LARGE1", "ENSG00000133424", 0),
+            ],
         )
         self.assertEqual(
             self.binary_classification_stats,
@@ -444,8 +184,19 @@ def test__check_string_representation_list(self):
 
 
 class TestAssessVariantPrioritisation(unittest.TestCase):
-    def setUp(self) -> None:
-        variant_results = [
+    @classmethod
+    def setUpClass(cls):
+        cls.db_connection = duckdb.connect(":memory:")
+        cls.db_connection.execute(
+            "CREATE TABLE test_table_variant (identifier VARCHAR(255) PRIMARY KEY,"
+            "phenopacket VARCHAR, chrom VARCHAR, pos INTEGER, ref VARCHAR, alt VARCHAR)"
+        )
+        cls.db_connection.execute(
+            "INSERT INTO test_table_variant (identifier, phenopacket, chrom, pos, ref, alt) VALUES "
+            "('phenopacket_1.json-3-126741108-G-C', 'phenopacket_1.json', '3', 126741108, 'G', 'C'),"
+            "('phenopacket_1.json-16-133564345-C-T', 'phenopacket_1.json', '16', 133564345, 'C', 'T'),"
+        )
+        cls.standardised_variant_results = [
             RankedPhEvalVariantResult(
                 chromosome="3",
                 start=126730873,
@@ -469,58 +220,40 @@ def setUp(self) -> None:
                 start=126741108,
                 end=126741108,
                 ref="G",
-                alt="A",
+                alt="C",
                 score=0.0484,
-                rank=1,
+                rank=2,
             ),
         ]
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.db_connection.close()
+
+    def setUp(self):
+        patcher = patch(
+            "pheval.analyse.get_connection.DBConnector.get_connection",
+            return_value=self.db_connection,
+        )
+        self.mock_get_connection = patcher.start()
+        self.addCleanup(patcher.stop)
+        self.db_connector = DBConnector()
         self.assess_variant_prioritisation = AssessVariantPrioritisation(
-            phenopacket_path=Path("/path/to/phenopacket.json"),
+            db_connection=self.db_connector,
+            table_name="test_table_variant",
             results_dir=Path("/path/to/results_dir"),
-            standardised_variant_results=variant_results,
-            threshold=0.0,
+            threshold=0,
             score_order="descending",
-            proband_causative_variants=[
-                GenomicVariant(chrom="3", pos=126741108, ref="G", alt="A"),
-                GenomicVariant(chrom="16", pos=133564345, ref="C", alt="T"),
-            ],
         )
         self.assess_variant_prioritisation_ascending_order = AssessVariantPrioritisation(
-            phenopacket_path=Path("/path/to/phenopacket.json"),
+            db_connection=self.db_connector,
+            table_name="test_table_variant",
             results_dir=Path("/path/to/results_dir"),
-            standardised_variant_results=variant_results,
-            threshold=0.0,
+            threshold=0,
             score_order="ascending",
-            proband_causative_variants=[
-                GenomicVariant(chrom="3", pos=126741108, ref="G", alt="A"),
-                GenomicVariant(chrom="16", pos=133564345, ref="C", alt="T"),
-            ],
         )
-        self.variant_rank_stats = RankStats()
-        self.variant_rank_records = defaultdict(dict)
         self.binary_classification_stats = BinaryClassificationStats()
 
-    def test_record_variant_prioritisation_match(self):
-        self.assertEqual(
-            self.assess_variant_prioritisation._record_variant_prioritisation_match(
-                result_entry=RankedPhEvalVariantResult(
-                    chromosome="3",
-                    start=126741108,
-                    end=126741108,
-                    ref="G",
-                    alt="A",
-                    score=0.0484,
-                    rank=1,
-                ),
-                rank_stats=self.variant_rank_stats,
-            ),
-            VariantPrioritisationResult(
-                phenopacket_path=Path("/path/to/phenopacket.json"),
-                variant=GenomicVariant(chrom="3", pos=126741108, ref="G", alt="A"),
-                rank=1,
-            ),
-        )
-
     def test_assess_variant_with_ascending_order_threshold_fails_cutoff(self):
         assess_with_threshold = copy(self.assess_variant_prioritisation_ascending_order)
         assess_with_threshold.threshold = 0.01
@@ -535,13 +268,8 @@ def test_assess_variant_with_ascending_order_threshold_fails_cutoff(self):
                     score=0.0484,
                     rank=1,
                 ),
-                rank_stats=self.variant_rank_stats,
             ),
-            None,
-        )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(top=0, top3=0, top5=0, top10=0, found=0, total=0, reciprocal_ranks=[]),
+            0,
         )
 
     def test_assess_variant_with_ascending_order_threshold_meets_cutoff(self):
@@ -558,17 +286,8 @@ def test_assess_variant_with_ascending_order_threshold_meets_cutoff(self):
                     score=0.0484,
                     rank=1,
                 ),
-                rank_stats=self.variant_rank_stats,
-            ),
-            VariantPrioritisationResult(
-                phenopacket_path=Path("/path/to/phenopacket.json"),
-                variant=GenomicVariant(chrom="3", pos=126741108, ref="G", alt="A"),
-                rank=1,
             ),
-        )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(top=1, top3=1, top5=1, top10=1, found=1, total=0, reciprocal_ranks=[1.0]),
+            1,
         )
 
     def test_assess_variant_with_threshold_fails_cutoff(self):
@@ -585,13 +304,8 @@ def test_assess_variant_with_threshold_fails_cutoff(self):
                     score=0.0484,
                     rank=1,
                 ),
-                rank_stats=self.variant_rank_stats,
             ),
-            None,
-        )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(top=0, top3=0, top5=0, top10=0, found=0, total=0, reciprocal_ranks=[]),
+            0,
         )
 
     def test_assess_variant_with_threshold_meets_cutoff(self):
@@ -608,373 +322,134 @@ def test_assess_variant_with_threshold_meets_cutoff(self):
                     score=0.0484,
                     rank=1,
                 ),
-                rank_stats=self.variant_rank_stats,
-            ),
-            VariantPrioritisationResult(
-                phenopacket_path=Path("/path/to/phenopacket.json"),
-                variant=GenomicVariant(chrom="3", pos=126741108, ref="G", alt="A"),
-                rank=1,
             ),
-        )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(top=1, top3=1, top5=1, top10=1, found=1, total=0, reciprocal_ranks=[1.0]),
+            1,
         )
 
-    def test_assess_variant_prioritisation_no_threshold(self):
+    def test_assess_variant_prioritisation(self):
         self.assess_variant_prioritisation.assess_variant_prioritisation(
-            self.variant_rank_stats, self.variant_rank_records, self.binary_classification_stats
-        )
-
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(
-                top=1,
-                top3=1,
-                top5=1,
-                top10=1,
-                found=1,
-                total=2,
-                reciprocal_ranks=[1.0],
-                relevant_result_ranks=[[1]],
-            ),
-        )
-        self.assertEqual(
-            self.variant_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "3-126741108-G-A",
-                    Path("/path/to/results_dir"): 1,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "16-133564345-C-T",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
+            self.standardised_variant_results,
+            Path("/path/to/phenopacket_1.json"),
             self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=1,
-                true_negatives=0,
-                false_positives=2,
-                false_negatives=0,
-                labels=[1, 0, 0],
-                scores=[0.0484, 0.0484, 0.0484],
-            ),
-        )
-
-    def test_assess_variant_prioritisation_fails_ascending_order_cutoff(self):
-        assess_with_threshold = copy(self.assess_variant_prioritisation_ascending_order)
-        assess_with_threshold.threshold = 0.01
-        assess_with_threshold.assess_variant_prioritisation(
-            self.variant_rank_stats, self.variant_rank_records, self.binary_classification_stats
         )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=0,
-                top10=0,
-                found=0,
-                total=2,
-                reciprocal_ranks=[],
-                relevant_result_ranks=[[0]],
-            ),
-        )
-        self.assertEqual(
-            self.variant_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "3-126741108-G-A",
-                    Path("/path/to/results_dir"): 0,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "16-133564345-C-T",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
+        self.db_connector.conn.execute("SELECT * FROM test_table_variant")
+        self.assertEqual(
+            self.db_connector.conn.fetchall(),
+            [
+                (
+                    "phenopacket_1.json-3-126741108-G-C",
+                    "phenopacket_1.json",
+                    "3",
+                    126741108,
+                    "G",
+                    "C",
+                    2,
+                ),
+                (
+                    "phenopacket_1.json-16-133564345-C-T",
+                    "phenopacket_1.json",
+                    "16",
+                    133564345,
+                    "C",
+                    "T",
+                    0,
+                ),
+            ],
         )
         self.assertEqual(
             self.binary_classification_stats,
             BinaryClassificationStats(
                 true_positives=0,
                 true_negatives=0,
-                false_positives=3,
+                false_positives=2,
                 false_negatives=1,
-                labels=[0, 0, 0],
+                labels=[0, 0, 1],
                 scores=[0.0484, 0.0484, 0.0484],
             ),
         )
 
-    def test_assess_variant_prioritisation_meets_ascending_order_cutoff(self):
-        assess_with_threshold = copy(self.assess_variant_prioritisation_ascending_order)
-        assess_with_threshold.threshold = 0.9
-        assess_with_threshold.assess_variant_prioritisation(
-            self.variant_rank_stats, self.variant_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(
-                top=1,
-                top3=1,
-                top5=1,
-                top10=1,
-                found=1,
-                total=2,
-                reciprocal_ranks=[1.0],
-                relevant_result_ranks=[[1]],
-            ),
-        )
-        self.assertEqual(
-            self.variant_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "3-126741108-G-A",
-                    Path("/path/to/results_dir"): 1,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "16-133564345-C-T",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=1,
-                true_negatives=0,
-                false_positives=2,
-                false_negatives=0,
-                labels=[1, 0, 0],
-                scores=[0.0484, 0.0484, 0.0484],
-            ),
-        )
 
-    def test_assess_variant_prioritisation_fails_cutoff(self):
-        assess_with_threshold = copy(self.assess_variant_prioritisation)
-        assess_with_threshold.threshold = 0.9
-        assess_with_threshold.assess_variant_prioritisation(
-            self.variant_rank_stats, self.variant_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=0,
-                top10=0,
-                found=0,
-                total=2,
-                reciprocal_ranks=[],
-                relevant_result_ranks=[[0]],
+class TestAssessDiseasePrioritisation(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.db_connection = duckdb.connect(":memory:")
+        cls.db_connection.execute(
+            "CREATE TABLE test_table_disease (identifier VARCHAR(255) PRIMARY KEY, "
+            "phenopacket VARCHAR, disease_identifier VARCHAR, disease_name VARCHAR)"
+        )
+        cls.db_connection.execute(
+            "INSERT INTO test_table_disease (identifier, phenopacket, disease_identifier, disease_name) VALUES "
+            "('phenopacket_1.json-OMIM:231670', 'phenopacket_1.json', 'OMIM:231670', 'Glutaric aciduria type 1'),"
+        )
+        cls.standardised_disease_results = [
+            RankedPhEvalDiseaseResult(
+                disease_name="Glutaric aciduria type 1",
+                disease_identifier="OMIM:231670",
+                score=1.0,
+                rank=1,
             ),
-        )
-        self.assertEqual(
-            self.variant_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "3-126741108-G-A",
-                    Path("/path/to/results_dir"): 0,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "16-133564345-C-T",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=0,
-                true_negatives=0,
-                false_positives=3,
-                false_negatives=1,
-                labels=[0, 0, 0],
-                scores=[0.0484, 0.0484, 0.0484],
+            RankedPhEvalDiseaseResult(
+                disease_name="Glutaric aciduria type 2",
+                disease_identifier="OMIM:231680",
+                score=0.5,
+                rank=2,
             ),
-        )
-
-    def test_assess_variant_prioritisation_meets_cutoff(self):
-        assess_with_threshold = copy(self.assess_variant_prioritisation_ascending_order)
-        assess_with_threshold.threshold = 0.1
-        assess_with_threshold.assess_variant_prioritisation(
-            self.variant_rank_stats, self.variant_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.variant_rank_stats,
-            RankStats(
-                top=1,
-                top3=1,
-                top5=1,
-                top10=1,
-                found=1,
-                total=2,
-                reciprocal_ranks=[1.0],
-                relevant_result_ranks=[[1]],
+            RankedPhEvalDiseaseResult(
+                disease_name="Glutaric aciduria type 3",
+                disease_identifier="OMIM:231690",
+                score=0.5,
+                rank=2,
             ),
-        )
-        self.assertEqual(
-            self.variant_rank_records,
-            {
-                1: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "3-126741108-G-A",
-                    Path("/path/to/results_dir"): 1,
-                },
-                2: {
-                    "Phenopacket": "phenopacket.json",
-                    "Variant": "16-133564345-C-T",
-                    Path("/path/to/results_dir"): 0,
-                },
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=1,
-                true_negatives=0,
-                false_positives=2,
-                false_negatives=0,
-                labels=[1, 0, 0],
-                scores=[0.0484, 0.0484, 0.0484],
+            RankedPhEvalDiseaseResult(
+                disease_name="Glutaric aciduria type 4",
+                disease_identifier="OMIM:231700",
+                score=0.3,
+                rank=4,
             ),
-        )
+        ]
 
+    @classmethod
+    def tearDownClass(cls):
+        cls.db_connection.close()
 
-class TestAssessDiseasePrioritisation(unittest.TestCase):
-    def setUp(self) -> None:
+    def setUp(self):
+        patcher = patch(
+            "pheval.analyse.get_connection.DBConnector.get_connection",
+            return_value=self.db_connection,
+        )
+        self.mock_get_connection = patcher.start()
+        self.addCleanup(patcher.stop)
+        self.db_connector = DBConnector()
         self.assess_disease_prioritisation = AssessDiseasePrioritisation(
-            phenopacket_path=Path("/path/to/phenopacket.json"),
+            db_connection=self.db_connector,
+            table_name="test_table_disease",
             results_dir=Path("/path/to/results_dir"),
-            standardised_disease_results=[
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 1",
-                    disease_identifier="OMIM:231670",
-                    score=1.0,
-                    rank=1,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 2",
-                    disease_identifier="OMIM:231680",
-                    score=0.5,
-                    rank=2,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 3",
-                    disease_identifier="OMIM:231690",
-                    score=0.5,
-                    rank=2,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 4",
-                    disease_identifier="OMIM:231700",
-                    score=0.3,
-                    rank=4,
-                ),
-            ],
-            threshold=0.0,
+            threshold=0,
             score_order="descending",
-            proband_diseases=[
-                ProbandDisease(
-                    disease_identifier="OMIM:231670", disease_name="Glutaric aciduria type 1"
-                )
-            ],
         )
         self.assess_disease_prioritisation_ascending_order = AssessDiseasePrioritisation(
-            phenopacket_path=Path("/path/to/phenopacket.json"),
+            db_connection=self.db_connector,
+            table_name="test_table_disease",
             results_dir=Path("/path/to/results_dir"),
-            standardised_disease_results=[
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 4",
-                    disease_identifier="OMIM:231690",
-                    score=0.3765,
-                    rank=1,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 2",
-                    disease_identifier="OMIM:231680",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 3",
-                    disease_identifier="OMIM:231690",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 1",
-                    disease_identifier="OMIM:231670",
-                    score=0.8764,
-                    rank=4,
-                ),
-            ],
-            threshold=0.0,
+            threshold=0,
             score_order="ascending",
-            proband_diseases=[
-                ProbandDisease(
-                    disease_identifier="OMIM:231670", disease_name="Glutaric aciduria type 1"
-                )
-            ],
         )
-        self.disease_rank_stats = RankStats(0, 0, 0, 0, 0)
-        self.disease_rank_records = defaultdict(dict)
         self.binary_classification_stats = BinaryClassificationStats()
 
-    def test_record_disease_prioritisation_match(self):
-        self.assertEqual(
-            self.assess_disease_prioritisation._record_disease_prioritisation_match(
-                disease=ProbandDisease(
-                    disease_identifier="OMIM:231670", disease_name="Glutaric aciduria type 1"
-                ),
-                result_entry=RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 1",
-                    disease_identifier="OMIM:231670",
-                    score=0.8764,
-                    rank=1,
-                ),
-                rank_stats=self.disease_rank_stats,
-            ),
-            DiseasePrioritisationResult(
-                phenopacket_path=PosixPath("/path/to/phenopacket.json"),
-                disease=ProbandDisease(
-                    disease_name="Glutaric aciduria type 1", disease_identifier="OMIM:231670"
-                ),
-                rank=1,
-            ),
-        )
-
     def test_assess_disease_with_ascending_order_threshold_fails_cutoff(self):
         assess_ascending_order_threshold = copy(self.assess_disease_prioritisation_ascending_order)
         assess_ascending_order_threshold.threshold = 0.1
         self.assertEqual(
             assess_ascending_order_threshold._assess_disease_with_threshold_ascending_order(
-                disease=ProbandDisease(
-                    disease_identifier="OMIM:231670", disease_name="Glutaric aciduria type 1"
-                ),
                 result_entry=RankedPhEvalDiseaseResult(
                     disease_name="Glutaric aciduria type 1",
                     disease_identifier="OMIM:231670",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.disease_rank_stats,
             ),
-            None,
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(top=0, top3=0, top5=0, top10=0, found=0, total=0, reciprocal_ranks=[]),
+            0,
         )
 
     def test_assess_disease_with_ascending_order_threshold_meets_cutoff(self):
@@ -982,28 +457,14 @@ def test_assess_disease_with_ascending_order_threshold_meets_cutoff(self):
         assess_ascending_order_threshold.threshold = 0.9
         self.assertEqual(
             assess_ascending_order_threshold._assess_disease_with_threshold_ascending_order(
-                disease=ProbandDisease(
-                    disease_identifier="OMIM:231670", disease_name="Glutaric aciduria type 1"
-                ),
                 result_entry=RankedPhEvalDiseaseResult(
                     disease_name="Glutaric aciduria type 1",
                     disease_identifier="OMIM:231670",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.disease_rank_stats,
-            ),
-            DiseasePrioritisationResult(
-                phenopacket_path=PosixPath("/path/to/phenopacket.json"),
-                disease=ProbandDisease(
-                    disease_name="Glutaric aciduria type 1", disease_identifier="OMIM:231670"
-                ),
-                rank=1,
             ),
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(top=1, top3=1, top5=1, top10=1, found=1, total=0, reciprocal_ranks=[1.0]),
+            1,
         )
 
     def test_assess_disease_with_threshold_fails_cutoff(self):
@@ -1011,22 +472,14 @@ def test_assess_disease_with_threshold_fails_cutoff(self):
         assess_with_threshold.threshold = 0.9
         self.assertEqual(
             assess_with_threshold._assess_disease_with_threshold(
-                disease=ProbandDisease(
-                    disease_identifier="OMIM:231670", disease_name="Glutaric aciduria type 1"
-                ),
                 result_entry=RankedPhEvalDiseaseResult(
                     disease_identifier="OMIM:231670",
                     disease_name="Glutaric aciduria type 1",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.disease_rank_stats,
             ),
-            None,
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(top=0, top3=0, top5=0, top10=0, found=0, total=0, reciprocal_ranks=[]),
+            0,
         )
 
     def test_assess_disease_with_threshold_meets_cutoff(self):
@@ -1034,220 +487,34 @@ def test_assess_disease_with_threshold_meets_cutoff(self):
         assess_with_threshold.threshold = 0.5
         self.assertEqual(
             assess_with_threshold._assess_disease_with_threshold(
-                disease=ProbandDisease(
-                    disease_identifier="OMIM:231670", disease_name="Glutaric aciduria type 1"
-                ),
                 result_entry=RankedPhEvalDiseaseResult(
                     disease_identifier="OMIM:231670",
                     disease_name="Glutaric aciduria type 1",
                     score=0.8764,
                     rank=1,
                 ),
-                rank_stats=self.disease_rank_stats,
-            ),
-            DiseasePrioritisationResult(
-                phenopacket_path=Path("/path/to/phenopacket.json"),
-                disease=ProbandDisease(
-                    disease_name="Glutaric aciduria type 1", disease_identifier="OMIM:231670"
-                ),
-                rank=1,
             ),
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(top=1, top3=1, top5=1, top10=1, found=1, total=0, reciprocal_ranks=[1.0]),
+            1,
         )
 
-    def test_assess_disease_prioritisation_no_threshold(self):
+    def test_assess_disease_prioritisation(self):
         self.assess_disease_prioritisation.assess_disease_prioritisation(
-            self.disease_rank_stats, self.disease_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(
-                top=1,
-                top3=1,
-                top5=1,
-                top10=1,
-                found=1,
-                total=1,
-                reciprocal_ranks=[1.0],
-                relevant_result_ranks=[[1]],
-            ),
-        )
-        self.assertEqual(
-            self.disease_rank_records,
-            {
-                1: {
-                    Path("/path/to/results_dir"): 1,
-                    "Disease": "OMIM:231670",
-                    "Phenopacket": "phenopacket.json",
-                }
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=1,
-                true_negatives=3,
-                false_positives=0,
-                false_negatives=0,
-                labels=[1, 0, 0, 0],
-                scores=[1.0, 0.5, 0.5, 0.3],
-            ),
-        )
-
-    def test_assess_disease_prioritisation_threshold_fails_ascending_order_cutoff(self):
-        assess_with_threshold = copy(self.assess_disease_prioritisation_ascending_order)
-        assess_with_threshold.threshold = 0.01
-        assess_with_threshold.assess_disease_prioritisation(
-            self.disease_rank_stats, self.disease_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=0,
-                top10=0,
-                found=0,
-                total=1,
-                reciprocal_ranks=[],
-                relevant_result_ranks=[[0]],
-            ),
-        )
-        self.assertEqual(
-            self.disease_rank_records,
-            {
-                1: {
-                    Path("/path/to/results_dir"): 0,
-                    "Disease": "OMIM:231670",
-                    "Phenopacket": "phenopacket.json",
-                }
-            },
-        )
-        self.assertEqual(
+            self.standardised_disease_results,
+            Path("/path/to/phenopacket_1.json"),
             self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=0,
-                true_negatives=3,
-                false_positives=1,
-                false_negatives=1,
-                labels=[0, 0, 0, 0],
-                scores=[0.3765, 0.5777, 0.5777, 0.8764],
-            ),
-        )
-
-    def test_assess_disease_prioritisation_threshold_meets_ascending_order_cutoff(self):
-        assess_with_threshold = copy(self.assess_disease_prioritisation_ascending_order)
-        assess_with_threshold.threshold = 0.9
-        assess_with_threshold.assess_disease_prioritisation(
-            self.disease_rank_stats, self.disease_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=1,
-                top10=1,
-                found=1,
-                total=1,
-                reciprocal_ranks=[0.25],
-                relevant_result_ranks=[[4]],
-            ),
-        )
-        self.assertEqual(
-            self.disease_rank_records,
-            {
-                1: {
-                    Path("/path/to/results_dir"): 4,
-                    "Disease": "OMIM:231670",
-                    "Phenopacket": "phenopacket.json",
-                },
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=0,
-                true_negatives=2,
-                false_positives=1,
-                false_negatives=1,
-                labels=[0, 0, 0, 1],
-                scores=[0.3765, 0.5777, 0.5777, 0.8764],
-            ),
-        )
-
-    def test_assess_disease_prioritisation_threshold_fails_cutoff(self):
-        assess_with_threshold = copy(self.assess_disease_prioritisation)
-        assess_with_threshold.threshold = 1.0
-        assess_with_threshold.assess_disease_prioritisation(
-            self.disease_rank_stats, self.disease_rank_records, self.binary_classification_stats
         )
+        self.db_connector.conn.execute("SELECT * FROM test_table_disease")
         self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(
-                top=0,
-                top3=0,
-                top5=0,
-                top10=0,
-                found=0,
-                total=1,
-                reciprocal_ranks=[],
-                relevant_result_ranks=[[0]],
-            ),
-        )
-        self.assertEqual(
-            self.disease_rank_records,
-            {
-                1: {
-                    Path("/path/to/results_dir"): 0,
-                    "Disease": "OMIM:231670",
-                    "Phenopacket": "phenopacket.json",
-                }
-            },
-        )
-        self.assertEqual(
-            self.binary_classification_stats,
-            BinaryClassificationStats(
-                true_positives=0,
-                true_negatives=3,
-                false_positives=1,
-                false_negatives=1,
-                labels=[0, 0, 0, 0],
-                scores=[1.0, 0.5, 0.5, 0.3],
-            ),
-        )
-
-    def test_assess_disease_prioritisation_threshold_meets_cutoff(self):
-        assess_with_threshold = copy(self.assess_disease_prioritisation)
-        assess_with_threshold.threshold = 0.1
-        assess_with_threshold.assess_disease_prioritisation(
-            self.disease_rank_stats, self.disease_rank_records, self.binary_classification_stats
-        )
-        self.assertEqual(
-            self.disease_rank_stats,
-            RankStats(
-                top=1,
-                top3=1,
-                top5=1,
-                top10=1,
-                found=1,
-                total=1,
-                reciprocal_ranks=[1.0],
-                relevant_result_ranks=[[1]],
-            ),
-        )
-        self.assertEqual(
-            self.disease_rank_records,
-            {
-                1: {
-                    Path("/path/to/results_dir"): 1,
-                    "Disease": "OMIM:231670",
-                    "Phenopacket": "phenopacket.json",
-                }
-            },
+            self.db_connector.conn.fetchall(),
+            [
+                (
+                    "phenopacket_1.json-OMIM:231670",
+                    "phenopacket_1.json",
+                    "OMIM:231670",
+                    "Glutaric aciduria type 1",
+                    1,
+                )
+            ],
         )
         self.assertEqual(
             self.binary_classification_stats,

From 873ac14e05ff00dc8ca80b7a0797243edef623e3 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 1 Aug 2024 14:23:17 +0100
Subject: [PATCH 39/81] remove argument

---
 tests/test_generate_plots.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_generate_plots.py b/tests/test_generate_plots.py
index d61b92d85..42469f0e3 100644
--- a/tests/test_generate_plots.py
+++ b/tests/test_generate_plots.py
@@ -15,7 +15,6 @@ def setUp(self) -> None:
         self.disease_plot_generator = PlotGenerator()
         self.benchmarking_result = BenchmarkRunResults(
             benchmark_name="tool_corpus",
-            ranks={},
             rank_stats=RankStats(
                 top=1,
                 top3=2,

From 33d67c0423022dd69e776c73b968007398768fe4 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 1 Aug 2024 14:24:41 +0100
Subject: [PATCH 40/81] update `RankStats.add_ranks()` mocking duckdb
 connection

---
 tests/test_rank_stats.py | 55 ++++++++++++++++++++++++++++------------
 1 file changed, 39 insertions(+), 16 deletions(-)

diff --git a/tests/test_rank_stats.py b/tests/test_rank_stats.py
index 32fa01874..268f3ee54 100644
--- a/tests/test_rank_stats.py
+++ b/tests/test_rank_stats.py
@@ -1,9 +1,33 @@
 import unittest
+from unittest.mock import patch
+
+import duckdb
 
 from pheval.analyse.rank_stats import RankStats
 
 
 class TestRankStats(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.db_connection = duckdb.connect(":memory:")
+        cls.db_connection.execute(
+            "CREATE TABLE test_table_gene (identifier VARCHAR(255) PRIMARY KEY, "
+            "phenopacket VARCHAR, gene_symbol VARCHAR, gene_identifier VARCHAR, results_dir_1 INTEGER)"
+        )
+        cls.db_connection.execute(
+            "INSERT INTO test_table_gene (identifier, phenopacket, gene_symbol, gene_identifier, results_dir_1) VALUES "
+            "('phenopacket_1.json-GENE1', 'phenopacket_1.json', 'GENE1', 'GENEID1', 1),"
+            "('phenopacket_2.json-GENE2', 'phenopacket_2.json', 'GENE2', 'GENEID2', 3),"
+            "('phenopacket_3.json-GENE3', 'phenopacket_3.json', 'GENE3', 'GENEID3', 5),"
+            "('phenopacket_4.json-GENE4', 'phenopacket_4.json', 'GENE4', 'GENEID4', 7),"
+            "('phenopacket_5.json-GENE5', 'phenopacket_5.json', 'GENE5', 'GENEID5', 10),"
+            "('phenopacket_2.json-GENE6', 'phenopacket_6.json', 'GENE6', 'GENEID6', 20),"
+        )
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.db_connection.close()
+
     def setUp(self) -> None:
         self.rank_stats = RankStats()
         self.complete_rank_stats = RankStats(
@@ -16,23 +40,22 @@ def setUp(self) -> None:
             relevant_result_ranks=[[4], [3], [6, 7], [2], [9], [20]],
         )
 
-    def test_add_rank(self):
-        self.rank_stats.add_rank(1)
-        self.rank_stats.add_rank(3)
-        self.rank_stats.add_rank(5)
-        self.rank_stats.add_rank(7)
-        self.rank_stats.add_rank(10)
+    @patch(
+        "pheval.analyse.get_connection.DBConnector.get_connection",
+        return_value=duckdb.connect(":memory:"),
+    )
+    def test_add_ranks(self, mock_get_connection):
+        mock_get_connection.return_value = self.db_connection
+        self.rank_stats.add_ranks("test_table_gene", "results_dir_1")
+        self.assertEqual(self.rank_stats.top, 1)
+        self.assertEqual(self.rank_stats.top3, 2)
+        self.assertEqual(self.rank_stats.top5, 3)
+        self.assertEqual(self.rank_stats.top10, 5)
+        self.assertEqual(self.rank_stats.found, 6)
+        self.assertEqual(self.rank_stats.total, 6)
         self.assertEqual(
-            self.rank_stats,
-            RankStats(
-                top=1,
-                top3=2,
-                top5=3,
-                top10=5,
-                found=5,
-                total=0,
-                reciprocal_ranks=[1.0, 0.3333333333333333, 0.2, 0.14285714285714285, 0.1],
-            ),
+            self.rank_stats.reciprocal_ranks,
+            [1.0, 0.3333333333333333, 0.2, 0.14285714285714285, 0.1, 0.05],
         )
 
     def test_percentage_rank(self):

From 278518d9703521004854f48eba54b583aa2d5284 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 1 Aug 2024 14:43:19 +0100
Subject: [PATCH 41/81] add tests for comparison tables

---
 tests/test_generate_summary_outputs.py | 158 ++++++++-----------------
 1 file changed, 51 insertions(+), 107 deletions(-)

diff --git a/tests/test_generate_summary_outputs.py b/tests/test_generate_summary_outputs.py
index a08b047df..293a4f723 100644
--- a/tests/test_generate_summary_outputs.py
+++ b/tests/test_generate_summary_outputs.py
@@ -1,125 +1,69 @@
 import unittest
-from collections import defaultdict
+from pathlib import Path
+from unittest.mock import patch
 
-import pandas as pd
+import duckdb
 
-from pheval.analyse.generate_summary_outputs import RankComparisonGenerator, merge_results
+from pheval.analyse.generate_summary_outputs import create_comparison_table, get_new_table_name
+from pheval.analyse.get_connection import DBConnector
 
 
-class TestMergeResults(unittest.TestCase):
-    def setUp(self) -> None:
-        self.result_1 = {
-            1: {
-                "Phenopacket": "phenopacket1.json",
-                "Gene": "GCDH",
-                "/path/to/results_directory1": 1,
-            }
-        }
-        self.result_2 = {
-            1: {
-                "Phenopacket": "phenopacket1.json",
-                "Gene": "GCDH",
-                "/path/to/results_directory2": 5,
-            }
-        }
-
-    def test_merge_results(self):
+class TestGetNewTableName(unittest.TestCase):
+    def test_get_new_table_name(self):
+        new_table_name = get_new_table_name(
+            Path("/path/to/result_dir_1/corpus_1"), Path("/path/to/result_dir_2/corpus_1"), "gene"
+        )
         self.assertEqual(
-            merge_results(self.result_1, self.result_2),
-            {
-                1: {
-                    "Phenopacket": "phenopacket1.json",
-                    "Gene": "GCDH",
-                    "/path/to/results_directory1": 1,
-                    "/path/to/results_directory2": 5,
-                }
-            },
+            new_table_name, "result_dir_1_corpus_1_vs_result_dir_2_corpus_1_gene_rank_comparison"
         )
 
 
-class TestRankComparisonGenerator(unittest.TestCase):
-    def setUp(self) -> None:
-        self.gene_rank_comparisons = RankComparisonGenerator(
-            defaultdict(
-                dict,
-                {
-                    1: {
-                        "Phenopacket": "phenopacket1.json",
-                        "Gene": "GCDH",
-                        "/path/to/results_directory1": 1,
-                        "/path/to/results_directory2": 5,
-                    }
-                },
-            )
+class TestCreateComparisonTable(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.db_connection = duckdb.connect(":memory:")
+        cls.db_connection.execute(
+            "CREATE TABLE test_table_gene (identifier VARCHAR(255) PRIMARY KEY, "
+            "phenopacket VARCHAR, gene_symbol VARCHAR, gene_identifier VARCHAR, "
+            '"/path/to/result_dir_1" INTEGER, '
+            '"/path/to/result_dir_2" INTEGER, '
+            '"/path/to/result_dir_3" INTEGER)'
         )
-        self.variant_rank_comparisons = RankComparisonGenerator(
-            defaultdict(
-                dict,
-                {
-                    1: {
-                        "Phenopacket": "phenopacket1.json",
-                        "Variant": "3-12563453454-C-T",
-                        "/path/to/results_directory1": 9,
-                        "/path/to/results_directory2": 3,
-                    }
-                },
-            )
+        cls.db_connection.execute(
+            "INSERT INTO test_table_gene (identifier, phenopacket, gene_symbol, gene_identifier, "
+            '"/path/to/result_dir_1", "/path/to/result_dir_2", "/path/to/result_dir_3") VALUES '
+            "('phenopacket_1.json-PLXNA1', 'phenopacket_1.json', 'PLXNA1', 'ENSG00000114554', 1, 0, 5),"
+            "('phenopacket_1.json-LARGE1', 'phenopacket_1.json', 'LARGE1', 'ENSG00000133424', 2, 9, 0),"
         )
 
-    def test_generate_gene_dataframe(self):
-        result = pd.DataFrame(
-            [
-                {
-                    "Phenopacket": "phenopacket1.json",
-                    "Gene": "GCDH",
-                    "/path/to/results_directory1": 1,
-                    "/path/to/results_directory2": 5,
-                }
-            ]
-        )
-        result.index += 1
-        self.assertTrue(result.equals(self.gene_rank_comparisons._generate_dataframe()))
+    @classmethod
+    def tearDownClass(cls):
+        cls.db_connection.close()
 
-    def test_generate_variant_dataframe(self):
-        result = pd.DataFrame(
-            [
-                {
-                    "Phenopacket": "phenopacket1.json",
-                    "Variant": "3-12563453454-C-T",
-                    "/path/to/results_directory1": 9,
-                    "/path/to/results_directory2": 3,
-                }
-            ]
+    def setUp(self):
+        patcher = patch(
+            "pheval.analyse.get_connection.DBConnector.get_connection",
+            return_value=self.db_connection,
         )
-        result.index += 1
-        self.assertTrue(result.equals(self.variant_rank_comparisons._generate_dataframe()))
+        self.mock_get_connection = patcher.start()
+        self.addCleanup(patcher.stop)
+        self.db_connector = DBConnector()
 
-    def test_calculate_gene_rank_difference(self):
-        result = pd.DataFrame(
-            [
-                {
-                    "Phenopacket": "phenopacket1.json",
-                    "Gene": "GCDH",
-                    "/path/to/results_directory1": 1,
-                    "/path/to/results_directory2": 5,
-                    "rank_change": -4,
-                }
-            ]
+    def test_create_comparison_table(self):
+        create_comparison_table(
+            "comparison_table_1",
+            self.db_connector,
+            ["/path/to/result_dir_1"],
+            "/path/to/result_dir_2",
+            "/path/to/result_dir_3",
+            "test_table_gene",
         )
-        result.index += 1
-        self.assertTrue(result.equals(self.gene_rank_comparisons._calculate_rank_difference()))
-
-    def test_calculate_variant_rank_difference(self):
-        result = pd.DataFrame(
+        self.db_connector.conn.execute("SELECT * FROM comparison_table_1")
+        self.assertEqual(
+            self.db_connector.conn.fetchall(),
             [
-                {
-                    "Phenopacket": "phenopacket1.json",
-                    "Variant": "3-12563453454-C-T",
-                    "/path/to/results_directory1": 9,
-                    "/path/to/results_directory2": 3,
-                    "rank_change": 6,
-                }
-            ]
+                ("phenopacket_1.json", "PLXNA1", "ENSG00000114554", 0, 5, "GAINED"),
+                ("phenopacket_1.json", "LARGE1", "ENSG00000133424", 9, 0, "LOST"),
+            ],
         )
-        result.index += 1
-        self.assertTrue(result.equals(self.variant_rank_comparisons._calculate_rank_difference()))

From dccdd09fe9fe0ffd9afa09d7ad6023bd9eb0c321 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 1 Aug 2024 14:54:45 +0100
Subject: [PATCH 42/81] add google docstrings

---
 .../analyse/generate_summary_outputs.py       | 10 +++++
 src/pheval/analyse/get_connection.py          | 22 +++++++++++
 src/pheval/analyse/rank_stats.py              | 38 ++++++++++++++++++-
 3 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index 6ce27757e..7c8b3b274 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -63,6 +63,16 @@ def create_comparison_table(
     result_dir_2: str,
     table_name: str,
 ) -> None:
+    """
+    Create rank comparison tables.
+    Args:
+        comparison_table_name (str): Name of the comparison table to create.
+        connector (DBConnector): DBConnector instance.
+        drop_columns (List[str]): List of columns to drop.
+        result_dir_1 (str): Path to the first result directory.
+        result_dir_2 (str): Path to the second result directory.
+        table_name (str): Name of the table to extract ranks from
+    """
     connector.drop_table(comparison_table_name)
     connector.conn.execute(
         f'CREATE TABLE "{comparison_table_name}" AS SELECT * '
diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index 88fa29dd9..b54e6a8e0 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -3,16 +3,32 @@
 
 
 class DBConnector:
+    """
+    Class to connect to database.
+    """
 
     def __init__(self):
+        """Initialize the DBConnector class."""
         self.conn = self.get_connection()
 
     @staticmethod
     def get_connection() -> DuckDBPyConnection:
+        """
+        Get a connection to the database.
+        Returns:
+            DuckDBPyConnection: Connection to the database.
+        """
         conn = duckdb.connect("analysis.db")
         return conn
 
     def add_column_integer_default(self, table_name: str, column: str, default: int = 0) -> None:
+        """
+        Add a column to an existing table with an integer default value.
+        Args:
+            table_name (str): Name of the table.
+            column (str): Name of the column to add.
+            default (int): Default integer value to add.
+        """
         try:
             self.conn.execute(
                 f'ALTER TABLE {table_name} ADD COLUMN "{column}" INTEGER DEFAULT {default}'
@@ -23,7 +39,13 @@ def add_column_integer_default(self, table_name: str, column: str, default: int
             pass
 
     def drop_table(self, table_name: str) -> None:
+        """
+        Drop a table from the database.
+        Args:
+            table_name: Name of the table to drop from the database
+        """
         self.conn.execute(f"""DROP TABLE IF EXISTS "{table_name}";""")
 
     def close(self):
+        """Close the connection to the database."""
         self.conn.close()
diff --git a/src/pheval/analyse/rank_stats.py b/src/pheval/analyse/rank_stats.py
index 0b4069dd1..b6e943e42 100644
--- a/src/pheval/analyse/rank_stats.py
+++ b/src/pheval/analyse/rank_stats.py
@@ -37,7 +37,13 @@ class RankStats:
     relevant_result_ranks: List[List[int]] = field(default_factory=list)
     mrr: float = None
 
-    def add_ranks(self, table_name: str, column_name: str):
+    def add_ranks(self, table_name: str, column_name: str) -> None:
+        """
+        Add ranks to RankStats instance from table.
+        Args:
+            table_name (str): Name of the table to add ranks from.
+            column_name (str): Name of the column to add ranks from.:
+        """
         conn = DBConnector().conn
         self.top = self._execute_count_query(conn, table_name, column_name, " = 1")
         self.top3 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 3")
@@ -53,6 +59,16 @@ def add_ranks(self, table_name: str, column_name: str):
     def _execute_count_query(
         conn: DuckDBPyConnection, table_name: str, column_name: str, condition: str
     ) -> int:
+        """
+        Execute count query on table.
+        Args:
+            conn (DuckDBPyConnection): Connection to the database.
+            table_name (str): Name of the table to execute count query on.
+            column_name (str): Name of the column to execute count query on.
+            condition (str): Condition to execute count query.
+        Returns:
+            int: Count query result.
+        """
         query = f'SELECT COUNT(*) FROM {table_name} WHERE "{column_name}" {condition}'
         return conn.execute(query).fetchone()[0]
 
@@ -60,6 +76,16 @@ def _execute_count_query(
     def _fetch_reciprocal_ranks(
         conn: DuckDBPyConnection, table_name: str, column_name: str
     ) -> List[float]:
+        """
+        Fetch reciprocal ranks from table.
+        Args:
+            conn (DuckDBPyConnection): Connection to the database.
+            table_name (str): Name of the table to fetch reciprocal ranks from.
+            column_name (str): Name of the column to fetch reciprocal ranks from.
+
+        Returns:
+            List[float]: List of reciprocal ranks.
+        """
         query = f'SELECT "{column_name}" FROM {table_name}'
         return [1 / rank[0] if rank[0] > 0 else 0 for rank in conn.execute(query).fetchall()]
 
@@ -67,6 +93,16 @@ def _fetch_reciprocal_ranks(
     def _fetch_relevant_ranks(
         conn: DuckDBPyConnection, table_name: str, column_name: str
     ) -> List[List[int]]:
+        """
+        Fetch relevant ranks from table.
+        Args:
+            conn (DuckDBPyConnection): Connection to the database.
+            table_name (str): Name of the table to fetch relevant ranks from.
+            column_name (str): Name of the column to fetch relevant ranks from.
+
+        Returns:
+            List[List[int]]: List of relevant ranks.
+        """
         query = (
             f'SELECT LIST("{column_name}") as values_list FROM {table_name} GROUP BY phenopacket'
         )

From 1bb68bf1f2cc010272ca8fb18396973be59e059c Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Wed, 7 Aug 2024 12:42:15 +0100
Subject: [PATCH 43/81] alter codebase to process run configurations for
 benchmarking

---
 src/pheval/analyse/analysis.py | 77 +++++++++++++++-------------------
 1 file changed, 33 insertions(+), 44 deletions(-)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index 6ef05d27a..820e35f20 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -1,5 +1,3 @@
-from typing import List
-
 from pheval.analyse.benchmark_generator import (
     BenchmarkRunOutputGenerator,
     DiseaseBenchmarkRunOutputGenerator,
@@ -12,11 +10,11 @@
 )
 from pheval.analyse.parse_corpus import CorpusParser
 from pheval.analyse.rank_stats import RankStatsWriter
-from pheval.analyse.run_data_parser import TrackInputOutputDirectories
+from pheval.analyse.run_data_parser import Config, RunConfig
 
 
 def _run_benchmark(
-    results_dir_and_input: TrackInputOutputDirectories,
+    run_config: RunConfig,
     score_order: str,
     output_prefix: str,
     threshold: float,
@@ -26,22 +24,22 @@ def _run_benchmark(
     """Run a benchmark on a result directory.
 
     Args:
-        results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results.
+        run_config (RunConfig): Run configuration.
         score_order (str): The order in which scores are arranged, this can be either ascending or descending.
         output_prefix (str): Prefix for the benchmark output file names.
         threshold (float): The threshold for benchmark evaluation.
         plot_type (str): Type of plot for benchmark visualisation.
         benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
     """
-    CorpusParser(results_dir_and_input.phenopacket_dir).parse_corpus(benchmark_generator)
+    CorpusParser(run_config.phenopacket_dir).parse_corpus(benchmark_generator)
     stats_writer = RankStatsWriter(
         str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
     benchmark_result = benchmark_generator.generate_benchmark_run_results(
-        results_dir_and_input, score_order, threshold
+        run_config, score_order, threshold
     )
     stats_writer.add_statistics_entry(
-        results_dir_and_input.results_dir,
+        run_config.run_identifier,
         benchmark_result.rank_stats,
         benchmark_result.binary_classification_stats,
     )
@@ -49,49 +47,43 @@ def _run_benchmark(
 
 
 def benchmark_directory(
-    results_dir_and_input: TrackInputOutputDirectories,
+    run_config: RunConfig,
     score_order: str,
     output_prefix: str,
     threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
     plot_type: str,
 ) -> None:
     """
     Benchmark prioritisation performance for a single run.
 
     Args:
-        results_dir_and_input (TrackInputOutputDirectories): Input and output directories for tracking results.
+        run_config (RunConfig): Run configuration.
         score_order (str): The order in which scores are arranged, this can be either ascending or descending.
         output_prefix (str): Prefix for the benchmark output file names.
         threshold (float): The threshold for benchmark evaluation.
-        gene_analysis (bool): Boolean flag indicating whether to benchmark gene results.
-        variant_analysis (bool): Boolean flag indicating whether to benchmark variant results.
-        disease_analysis (bool): Boolean flag indicating whether to benchmark disease results.
         plot_type (str): Type of plot for benchmark visualisation.
     """
-    if gene_analysis:
+    if run_config.gene_analysis:
         _run_benchmark(
-            results_dir_and_input=results_dir_and_input,
+            run_config=run_config,
             score_order=score_order,
             output_prefix=output_prefix,
             threshold=threshold,
             plot_type=plot_type,
             benchmark_generator=GeneBenchmarkRunOutputGenerator(),
         )
-    if variant_analysis:
+    if run_config.variant_analysis:
         _run_benchmark(
-            results_dir_and_input=results_dir_and_input,
+            run_config=run_config,
             score_order=score_order,
             output_prefix=output_prefix,
             threshold=threshold,
             plot_type=plot_type,
             benchmark_generator=VariantBenchmarkRunOutputGenerator(),
         )
-    if disease_analysis:
+    if run_config.disease_analysis:
         _run_benchmark(
-            results_dir_and_input=results_dir_and_input,
+            run_config=run_config,
             score_order=score_order,
             output_prefix=output_prefix,
             threshold=threshold,
@@ -101,7 +93,7 @@ def benchmark_directory(
 
 
 def _run_benchmark_comparison(
-    results_directories: List[TrackInputOutputDirectories],
+    run_config: Config,
     score_order: str,
     output_prefix: str,
     threshold: float,
@@ -112,7 +104,7 @@ def _run_benchmark_comparison(
     Run a benchmark on several result directories.
 
     Args:
-        results_directories (List[TrackInputOutputDirectories]): List of input and output directories
+        run_config (List[TrackInputOutputDirectories]): List of input and output directories
             for tracking results across multiple directories.
         score_order (str): The order in which scores are arranged, this can be either ascending or descending.
         output_prefix (str): Prefix for the benchmark output file names.
@@ -123,27 +115,27 @@ def _run_benchmark_comparison(
     stats_writer = RankStatsWriter(
         str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
     )
-    unique_test_corpora_directories = set(
-        [result.phenopacket_dir for result in results_directories]
-    )
+    unique_test_corpora_directories = set([result.phenopacket_dir for result in run_config.runs])
     [
         CorpusParser(test_corpora_directory).parse_corpus(benchmark_generator)
         for test_corpora_directory in unique_test_corpora_directories
     ]
     benchmarking_results = []
-    for results_dir_and_input in results_directories:
+    for run in run_config.runs:
         benchmark_result = benchmark_generator.generate_benchmark_run_results(
-            results_dir_and_input, score_order, threshold
+            run, score_order, threshold
         )
         stats_writer.add_statistics_entry(
-            results_dir_and_input.results_dir,
+            run.run_identifier,
             benchmark_result.rank_stats,
             benchmark_result.binary_classification_stats,
         )
         benchmarking_results.append(benchmark_result)
+    run_identifiers = [run.run_identifier for run in run_config.runs]
     [
         generate_benchmark_comparison_output(
             benchmarking_results,
+            run_identifiers,
             plot_type,
             benchmark_generator,
             f"{unique_test_corpora_directory.parents[0].name}_"
@@ -154,49 +146,46 @@ def _run_benchmark_comparison(
 
 
 def benchmark_run_comparisons(
-    results_directories: List[TrackInputOutputDirectories],
+    run_config: Config,
     score_order: str,
     output_prefix: str,
     threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
     plot_type: str,
 ) -> None:
     """
     Benchmark prioritisation performance for several runs.
 
     Args:
-        results_directories (List[TrackInputOutputDirectories]): Input and output directories for tracking results.
+        run_config (Config): Run configurations.
         score_order (str): The order in which scores are arranged, this can be either ascending or descending.
         output_prefix (str): Prefix for the benchmark output file names.
         threshold (float): The threshold for benchmark evaluation.
-        gene_analysis (bool): Boolean flag indicating whether to benchmark gene results.
-        variant_analysis (bool): Boolean flag indicating whether to benchmark variant results.
-        disease_analysis (bool): Boolean flag indicating whether to benchmark disease results.
         plot_type (str): Type of plot for benchmark visualisation.
     """
-    if gene_analysis:
+    gene_analysis_runs = Config(runs=[run for run in run_config.runs if run.gene_analysis])
+    variant_analysis_runs = Config(runs=[run for run in run_config.runs if run.variant_analysis])
+    disease_analysis_runs = Config(runs=[run for run in run_config.runs if run.disease_analysis])
+    if gene_analysis_runs:
         _run_benchmark_comparison(
-            results_directories=results_directories,
+            run_config=gene_analysis_runs,
             score_order=score_order,
             output_prefix=output_prefix,
             threshold=threshold,
             plot_type=plot_type,
             benchmark_generator=GeneBenchmarkRunOutputGenerator(),
         )
-    if variant_analysis:
+    if variant_analysis_runs:
         _run_benchmark_comparison(
-            results_directories=results_directories,
+            run_config=variant_analysis_runs,
             score_order=score_order,
             output_prefix=output_prefix,
             threshold=threshold,
             plot_type=plot_type,
             benchmark_generator=VariantBenchmarkRunOutputGenerator(),
         )
-    if disease_analysis:
+    if disease_analysis_runs:
         _run_benchmark_comparison(
-            results_directories=results_directories,
+            run_config=disease_analysis_runs,
             score_order=score_order,
             output_prefix=output_prefix,
             threshold=threshold,

From 5cb9cbe5f153a609157162692a4dfd77097ae05d Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Wed, 7 Aug 2024 12:43:09 +0100
Subject: [PATCH 44/81] alter codebase to process run configurations for
 benchmarking

---
 src/pheval/analyse/benchmark_generator.py     | 42 ++++++-----
 .../disease_prioritisation_analysis.py        | 35 +++++----
 .../analyse/gene_prioritisation_analysis.py   | 35 +++++----
 .../analyse/generate_summary_outputs.py       | 64 +++++++++--------
 src/pheval/analyse/rank_stats.py              |  7 +-
 src/pheval/analyse/run_data_parser.py         | 72 ++++++++++++-------
 .../variant_prioritisation_analysis.py        | 39 +++++-----
 src/pheval/cli_pheval_utils.py                | 63 ++++++----------
 tests/test_analysis.py                        | 12 ++--
 tests/test_generate_summary_outputs.py        |  9 +--
 10 files changed, 188 insertions(+), 190 deletions(-)

diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index 6996427ab..9441022ad 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -4,7 +4,7 @@
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.disease_prioritisation_analysis import benchmark_disease_prioritisation
 from pheval.analyse.gene_prioritisation_analysis import benchmark_gene_prioritisation
-from pheval.analyse.run_data_parser import TrackInputOutputDirectories
+from pheval.analyse.run_data_parser import RunConfig
 from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
 from pheval.constants import (
     DISEASE_PLOT_Y_LABEL,
@@ -31,9 +31,7 @@ class BenchmarkRunOutputGenerator:
 
     prioritisation_type_string: str
     y_label: str
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
-    ]
+    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults]
     stats_comparison_file_suffix: str
 
 
@@ -53,18 +51,18 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             Defaults to GENE_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate gene prioritisation
             benchmark run results. Defaults to benchmark_gene_prioritisation.
-            Takes parameters: input and results directory, score order, threshold, rank comparison,
+            Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
         stats_comparison_file_suffix (str): Suffix for the gene rank comparison file.
-            Defaults to "-gene_summary.tsv".
+            Defaults to "-gene_summary".
     """
 
     prioritisation_type_string: str = GENE_PRIORITISATION_TYPE_STR
     y_label: str = GENE_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
-    ] = benchmark_gene_prioritisation
-    stats_comparison_file_suffix: str = "-gene_summary.tsv"
+    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults] = (
+        benchmark_gene_prioritisation
+    )
+    stats_comparison_file_suffix: str = "-gene_summary"
 
 
 @dataclass
@@ -83,19 +81,19 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             Defaults to VARIANT_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate variant prioritisation
             benchmark run results. Defaults to benchmark_variant_prioritisation.
-            Takes parameters: input and results directory, score order, threshold, rank comparison,
+            Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
         stats_comparison_file_suffix (str): Suffix for the variant rank comparison file.
-            Defaults to "-variant_summary.tsv".
+            Defaults to "-variant_summary".
 
     """
 
     prioritisation_type_string: str = VARIANT_PRIORITISATION_TYPE_STR
     y_label: str = VARIANT_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
-    ] = benchmark_variant_prioritisation
-    stats_comparison_file_suffix: str = "-variant_summary.tsv"
+    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults] = (
+        benchmark_variant_prioritisation
+    )
+    stats_comparison_file_suffix: str = "-variant_summary"
 
 
 @dataclass
@@ -114,15 +112,15 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             Defaults to DISEASE_PLOT_Y_LABEL.
         generate_benchmark_run_results (Callable): Callable to generate disease prioritisation
             benchmark run results. Defaults to benchmark_disease_prioritisation.
-            Takes parameters: input and results directory, score order, threshold, rank comparison,
+            Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
         stats_comparison_file_suffix (str): Suffix for the disease rank comparison file.
-            Defaults to "-disease_summary.tsv".
+            Defaults to "-disease_summary".
     """
 
     prioritisation_type_string: str = DISEASE_PRIORITISATION_TYPE_STR
     y_label: str = DISEASE_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[
-        [TrackInputOutputDirectories, str, float], BenchmarkRunResults
-    ] = benchmark_disease_prioritisation
-    stats_comparison_file_suffix: str = "-disease_summary.tsv"
+    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults] = (
+        benchmark_disease_prioritisation
+    )
+    stats_comparison_file_suffix: str = "-disease_summary"
diff --git a/src/pheval/analyse/disease_prioritisation_analysis.py b/src/pheval/analyse/disease_prioritisation_analysis.py
index a2098a9c9..b54c2251e 100644
--- a/src/pheval/analyse/disease_prioritisation_analysis.py
+++ b/src/pheval/analyse/disease_prioritisation_analysis.py
@@ -6,7 +6,7 @@
 from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.rank_stats import RankStats
-from pheval.analyse.run_data_parser import TrackInputOutputDirectories
+from pheval.analyse.run_data_parser import RunConfig
 from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
 from pheval.utils.file_utils import all_files
 
@@ -18,7 +18,7 @@ def __init__(
         self,
         db_connection: DBConnector,
         table_name: str,
-        results_dir: Path,
+        column: str,
         threshold: float,
         score_order: str,
     ):
@@ -28,16 +28,15 @@ def __init__(
         Args:
             db_connection (DBConnector): Database connection
             table_name (str): Table name
-            results_dir (Path): Path to the results directory
+            column (Path): Column name
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending
 
         """
-        self.results_dir = results_dir
         self.threshold = threshold
         self.score_order = score_order
         self.conn = db_connection.conn
-        self.column = str(self.results_dir.parents[0])
+        self.column = column
         self.table_name = table_name
         db_connection.add_column_integer_default(
             table_name=table_name, column=self.column, default=0
@@ -155,7 +154,7 @@ def assess_disease_prioritisation(
 
 def assess_phenopacket_disease_prioritisation(
     phenopacket_path: Path,
-    results_dir_and_input: TrackInputOutputDirectories,
+    run: RunConfig,
     disease_binary_classification_stats: BinaryClassificationStats,
     disease_benchmarker: AssessDiseasePrioritisation,
 ) -> None:
@@ -165,11 +164,11 @@ def assess_phenopacket_disease_prioritisation(
 
     Args:
         phenopacket_path (Path): Path to the Phenopacket.
-        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
+        run (RunConfig): Run configuration.
         disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         disease_benchmarker (AssessDiseasePrioritisation): AssessDiseasePrioritisation class instance.
     """
-    standardised_disease_result = results_dir_and_input.results_dir.joinpath(
+    standardised_disease_result = run.results_dir.joinpath(
         f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
     )
     pheval_disease_result = read_standardised_result(standardised_disease_result)
@@ -181,7 +180,7 @@ def assess_phenopacket_disease_prioritisation(
 
 
 def benchmark_disease_prioritisation(
-    results_directory_and_input: TrackInputOutputDirectories,
+    run: RunConfig,
     score_order: str,
     threshold: float,
 ):
@@ -189,7 +188,7 @@ def benchmark_disease_prioritisation(
     Benchmark a directory based on disease prioritisation results.
 
     Args:
-        results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
+        run (RunConfig): Run configuration.
         score_order (str): The order in which scores are arranged.
         threshold (float): Threshold for assessment.
 
@@ -201,27 +200,27 @@ def benchmark_disease_prioritisation(
     db_connection = DBConnector()
     disease_benchmarker = AssessDiseasePrioritisation(
         db_connection,
-        f"{results_directory_and_input.phenopacket_dir.parents[0].name}_disease",
-        results_directory_and_input.results_dir.joinpath("pheval_disease_results/"),
+        f"{run.phenopacket_dir.parents[0].name}_disease",
+        run.run_identifier,
         threshold,
         score_order,
     )
-    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
+    for phenopacket_path in all_files(run.phenopacket_dir):
         assess_phenopacket_disease_prioritisation(
             phenopacket_path,
-            results_directory_and_input,
+            run,
             disease_binary_classification_stats,
             disease_benchmarker,
         )
     db_connection.close()
     disease_rank_stats = RankStats()
     disease_rank_stats.add_ranks(
-        table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_disease",
-        column_name=str(results_directory_and_input.results_dir),
+        table_name=f"{run.phenopacket_dir.parents[0].name}_disease",
+        column_name=str(run.run_identifier),
     )
     return BenchmarkRunResults(
         rank_stats=disease_rank_stats,
-        results_dir=results_directory_and_input.results_dir,
+        benchmark_name=run.run_identifier,
         binary_classification_stats=disease_binary_classification_stats,
-        phenopacket_dir=results_directory_and_input.phenopacket_dir,
+        phenopacket_dir=run.phenopacket_dir,
     )
diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 9fd3c1c1c..743f3e346 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -8,7 +8,7 @@
 from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.rank_stats import RankStats
-from pheval.analyse.run_data_parser import TrackInputOutputDirectories
+from pheval.analyse.run_data_parser import RunConfig
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
 from pheval.utils.file_utils import all_files
 
@@ -20,7 +20,7 @@ def __init__(
         self,
         db_connection: DBConnector,
         table_name: str,
-        results_dir: Path,
+        column: str,
         threshold: float,
         score_order: str,
     ):
@@ -30,16 +30,15 @@ def __init__(
         Args:
             db_connection (DBConnector): Database connection
             table_name (str): Table name
-            results_dir (Path): Path to the results directory
+            column (Path): Column name
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending
         """
-        self.results_dir = results_dir
         self.threshold = threshold
         self.score_order = score_order
         self.db_connection = db_connection
         self.conn = db_connection.conn
-        self.column = str(self.results_dir.parents[0])
+        self.column = column
         self.table_name = table_name
         db_connection.add_column_integer_default(
             table_name=table_name, column=self.column, default=0
@@ -173,7 +172,7 @@ def assess_gene_prioritisation(
 
 def assess_phenopacket_gene_prioritisation(
     phenopacket_path: Path,
-    results_dir_and_input: TrackInputOutputDirectories,
+    run: RunConfig,
     gene_binary_classification_stats: BinaryClassificationStats,
     gene_benchmarker: AssessGenePrioritisation,
 ) -> None:
@@ -183,11 +182,11 @@ def assess_phenopacket_gene_prioritisation(
 
     Args:
         phenopacket_path (Path): Path to the Phenopacket.
-        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
+        run (RunConfig): Run configuration.
         gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         gene_benchmarker (AssessGenePrioritisation): AssessGenePrioritisation class instance.
     """
-    standardised_gene_result = results_dir_and_input.results_dir.joinpath(
+    standardised_gene_result = run.results_dir.joinpath(
         f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
     )
     pheval_gene_result = read_standardised_result(standardised_gene_result)
@@ -199,14 +198,14 @@ def assess_phenopacket_gene_prioritisation(
 
 
 def benchmark_gene_prioritisation(
-    results_directory_and_input: TrackInputOutputDirectories,
+    run: RunConfig,
     score_order: str,
     threshold: float,
 ) -> BenchmarkRunResults:
     """
     Benchmark a directory based on gene prioritisation results.
      Args:
-         results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
+         run (RunConfig): Run configuration.
          score_order (str): The order in which scores are arranged.
          threshold (float): Threshold for assessment.
      Returns:
@@ -217,27 +216,27 @@ def benchmark_gene_prioritisation(
     db_connection = DBConnector()
     gene_benchmarker = AssessGenePrioritisation(
         db_connection,
-        f"{results_directory_and_input.phenopacket_dir.parents[0].name}" f"_gene",
-        results_directory_and_input.results_dir.joinpath("pheval_gene_results/"),
+        f"{run.phenopacket_dir.parents[0].name}" f"_gene",
+        run.run_identifier,
         threshold,
         score_order,
     )
-    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
+    for phenopacket_path in all_files(run.phenopacket_dir):
         assess_phenopacket_gene_prioritisation(
             phenopacket_path,
-            results_directory_and_input,
+            run,
             gene_binary_classification_stats,
             gene_benchmarker,
         )
     db_connection.close()
     gene_rank_stats = RankStats()
     gene_rank_stats.add_ranks(
-        table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_gene",
-        column_name=str(results_directory_and_input.results_dir),
+        table_name=f"{run.phenopacket_dir.parents[0].name}_gene",
+        column_name=str(run.run_identifier),
     )
     return BenchmarkRunResults(
         rank_stats=gene_rank_stats,
-        results_dir=results_directory_and_input.results_dir,
+        benchmark_name=run.run_identifier,
         binary_classification_stats=gene_binary_classification_stats,
-        phenopacket_dir=results_directory_and_input.phenopacket_dir,
+        phenopacket_dir=run.phenopacket_dir,
     )
diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index 7c8b3b274..e6b0cda2e 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -1,5 +1,4 @@
 import itertools
-from pathlib import Path
 from typing import List
 
 from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
@@ -22,7 +21,11 @@ def generate_benchmark_output(
         plot_type (str): Type of plot to generate.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
     """
-    results_dir_name = benchmarking_results.results_dir.name
+    results_dir_name = (
+        benchmarking_results.results_dir.name
+        if benchmarking_results.results_dir
+        else benchmarking_results.benchmark_name
+    )
     conn = DBConnector().conn
     conn.execute(
         f"CREATE TABLE {results_dir_name}_{benchmark_generator.prioritisation_type_string}{RANK_COMPARISON_SUFFIX} "
@@ -38,20 +41,18 @@ def generate_benchmark_output(
     )
 
 
-def get_new_table_name(result_dir_1: Path, result_dir_2: Path, output_prefix: str) -> str:
+def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_prefix: str) -> str:
     """
     Get the new table name for rank comparison tables.
     Args:
-        result_dir_1: The path to the first result directory.
-        result_dir_2: The path to the second result directory.
+        run_identifier_1: The first run identifier.
+        run_identifier_2: The second run identifier.
         output_prefix: The output prefix of the table
     Returns:
         The new table name.
     """
     return (
-        f"{Path(result_dir_1).parents[0].name}_{Path(result_dir_1).name}_vs_"
-        f"{Path(result_dir_2).parents[0].name}_{Path(result_dir_2).name}_"
-        f"{output_prefix}{RANK_COMPARISON_SUFFIX}"
+        f"{run_identifier_1}_vs_" f"{run_identifier_2}_" f"{output_prefix}{RANK_COMPARISON_SUFFIX}"
     )
 
 
@@ -59,8 +60,8 @@ def create_comparison_table(
     comparison_table_name: str,
     connector: DBConnector,
     drop_columns: List[str],
-    result_dir_1: str,
-    result_dir_2: str,
+    run_identifier_1: str,
+    run_identifier_2: str,
     table_name: str,
 ) -> None:
     """
@@ -69,28 +70,32 @@ def create_comparison_table(
         comparison_table_name (str): Name of the comparison table to create.
         connector (DBConnector): DBConnector instance.
         drop_columns (List[str]): List of columns to drop.
-        result_dir_1 (str): Path to the first result directory.
-        result_dir_2 (str): Path to the second result directory.
+        run_identifier_1 (str): The first run identifier.
+        run_identifier_2 (str): The second run identifier.
         table_name (str): Name of the table to extract ranks from
     """
     connector.drop_table(comparison_table_name)
+    excluded_columns = (", ".join(drop_columns), "identifier") if drop_columns else ("identifier",)
     connector.conn.execute(
         f'CREATE TABLE "{comparison_table_name}" AS SELECT * '
-        f'EXCLUDE (\'{", ".join(drop_columns)}\', identifier) FROM {table_name}'
+        f"EXCLUDE {excluded_columns} FROM {table_name}"
     )
+
     connector.conn.execute(
         f"""ALTER TABLE "{comparison_table_name}" ADD COLUMN rank_change VARCHAR;"""
     )
     connector.conn.execute(
-        f'UPDATE "{comparison_table_name}" SET rank_change = CASE WHEN "{result_dir_1}" = 0 AND "{result_dir_2}" != 0 '
-        f"THEN 'GAINED' WHEN \"{result_dir_1}\" != 0 AND \"{result_dir_2}\" = 0 THEN 'LOST' ELSE "
-        f'CAST ("{result_dir_1}" - "{result_dir_2}" AS VARCHAR) END;'
+        f'UPDATE "{comparison_table_name}" SET rank_change = CASE WHEN "{run_identifier_1}" = 0 '
+        f'AND "{run_identifier_2}" != 0 '
+        f"THEN 'GAINED' WHEN \"{run_identifier_1}\" != 0 AND \"{run_identifier_2}\" = 0 THEN 'LOST' ELSE "
+        f'CAST ("{run_identifier_1}" - "{run_identifier_2}" AS VARCHAR) END;'
     )
     connector.conn.commit()
 
 
 def generate_benchmark_comparison_output(
     benchmarking_results: List[BenchmarkRunResults],
+    run_identifiers: List[str],
     plot_type: str,
     benchmark_generator: BenchmarkRunOutputGenerator,
     table_name: str,
@@ -105,28 +110,29 @@ def generate_benchmark_comparison_output(
     Args:
         benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances
             representing the benchmarking results of multiple runs.
+        run_identifiers (List[str]): A list of run identifiers.
         plot_type (str): The type of plot to be generated.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
         table_name (str): The name of the table where ranks are stored.
     """
     output_prefix = benchmark_generator.prioritisation_type_string
     connector = DBConnector()
-    run_columns = [
-        column
-        for column in connector.conn.execute(f"PRAGMA table_info('{table_name}');")
-        .fetchdf()["name"]
-        .to_list()
-        if "/" in column
-    ]
     for pair in itertools.combinations(
-        [str(result.results_dir) for result in benchmarking_results], 2
+        [str(result.benchmark_name) for result in benchmarking_results], 2
     ):
-        result_dir_1 = pair[0]
-        result_dir_2 = pair[1]
-        drop_columns = [run for run in run_columns if run not in pair]
-        comparison_table_name = get_new_table_name(result_dir_1, result_dir_2, output_prefix)
+        run_identifier_1 = pair[0]
+        run_identifier_2 = pair[1]
+        drop_columns = [run for run in run_identifiers if run not in pair]
+        comparison_table_name = get_new_table_name(
+            run_identifier_1, run_identifier_2, output_prefix
+        )
         create_comparison_table(
-            comparison_table_name, connector, drop_columns, result_dir_1, result_dir_2, table_name
+            comparison_table_name,
+            connector,
+            drop_columns,
+            run_identifier_1,
+            run_identifier_2,
+            table_name,
         )
     generate_plots(
         benchmarking_results,
diff --git a/src/pheval/analyse/rank_stats.py b/src/pheval/analyse/rank_stats.py
index b6e943e42..37f0d9c04 100644
--- a/src/pheval/analyse/rank_stats.py
+++ b/src/pheval/analyse/rank_stats.py
@@ -1,5 +1,4 @@
 from dataclasses import dataclass, field
-from pathlib import Path
 from statistics import mean
 from typing import List
 
@@ -386,21 +385,21 @@ def __init__(self, table_name: str):
 
     def add_statistics_entry(
         self,
-        directory_path: Path,
+        run_identifier: str,
         rank_stats: RankStats,
         binary_classification: BinaryClassificationStats,
     ):
         """
         Add statistics row to table for a run.
         Args:
-            directory_path (Path): Path to the results directory associated with the run.
+            run_identifier (str): The run identifier.
             rank_stats (RankStats): RankStats object for the run.
             binary_classification (BinaryClassificationStats): BinaryClassificationStats object for the run.
         """
         conn = DBConnector().conn
         conn.execute(
             f' INSERT INTO "{self.table_name}" VALUES ( '
-            f"'{directory_path}',"
+            f"'{run_identifier}',"
             f"{rank_stats.top},"
             f"{rank_stats.top3},"
             f"{rank_stats.top5},"
diff --git a/src/pheval/analyse/run_data_parser.py b/src/pheval/analyse/run_data_parser.py
index 3c652132f..75b6cc182 100644
--- a/src/pheval/analyse/run_data_parser.py
+++ b/src/pheval/analyse/run_data_parser.py
@@ -2,43 +2,65 @@
 from pathlib import Path
 from typing import List
 
-import pandas as pd
+import yaml
+from pydantic import BaseModel
 
 
-@dataclass
-class TrackInputOutputDirectories:
+class RunConfig(BaseModel):
     """
-    Track the input phenopacket test data for a corresponding pheval output directory.
+    Store configurations for a run.
 
     Attributes:
-        phenopacket_dir (Path): The directory containing input phenopackets.
-        results_dir (Path): The directory containing output results from pheval.
+        run_identifier (str): The run identifier.
+        phenopacket_dir (str): The path to the phenopacket directory used for generating the results.
+        results_dir (str): The path to the results directory.
+        gene_analysis (bool): Whether or not to benchmark gene analysis results.
+        variant_analysis (bool): Whether or not to benchmark variant analysis results.
+        disease_analysis (bool): Whether or not to benchmark disease analysis results.
     """
 
+    run_identifier: str
     phenopacket_dir: Path
     results_dir: Path
+    gene_analysis: bool
+    variant_analysis: bool
+    disease_analysis: bool
 
 
-def parse_run_data_text_file(run_data_path: Path) -> List[TrackInputOutputDirectories]:
+class Config(BaseModel):
+    """
+    Store configurations for a runs.
+    Attributes:
+        runs (List[RunConfig]): The list of run configurations.
     """
-    Parse run data .txt file returning a list of input phenopacket and corresponding output directories.
 
-    Args:
-        run_data_path (Path): The path to the run data .txt file.
+    runs: List[RunConfig]
+
 
+def parse_run_config(run_data_path: Path) -> Config:
+    """
+    Parse a run configuration yaml file.
+    Args:
+        run_data_path (Path): The path to the run data yaml configuration.
     Returns:
-        List[TrackInputOutputDirectories]: A list of TrackInputOutputDirectories objects, containing
-        input test data directories and their corresponding output directories.
-
-    Notes:
-        The run data .txt file should be formatted with tab-separated values. Each row should contain
-        two columns: the first column representing the input test data phenopacket directory, and
-        the second column representing the corresponding run output directory.
-    """
-    run_data = pd.read_csv(run_data_path, delimiter="\t", header=None)
-    run_data_list = []
-    for _index, row in run_data.iterrows():
-        run_data_list.append(
-            TrackInputOutputDirectories(phenopacket_dir=Path(row[0]), results_dir=Path(row[1]))
-        )
-    return run_data_list
+        Config: The parsed run configurations.
+    """
+    with open(run_data_path, "r") as f:
+        config_data = yaml.safe_load(f)
+    f.close()
+    config = Config(**config_data)
+    return config
+
+
+@dataclass
+class TrackInputOutputDirectories:
+    """
+    Track the input phenopacket test data for a corresponding pheval output directory.
+
+    Attributes:
+        phenopacket_dir (Path): The directory containing input phenopackets.
+        results_dir (Path): The directory containing output results from pheval.
+    """
+
+    phenopacket_dir: Path
+    results_dir: Path
diff --git a/src/pheval/analyse/variant_prioritisation_analysis.py b/src/pheval/analyse/variant_prioritisation_analysis.py
index 6ea131aa0..ea45351d0 100644
--- a/src/pheval/analyse/variant_prioritisation_analysis.py
+++ b/src/pheval/analyse/variant_prioritisation_analysis.py
@@ -6,7 +6,9 @@
 from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.rank_stats import RankStats
-from pheval.analyse.run_data_parser import TrackInputOutputDirectories
+from pheval.analyse.run_data_parser import RunConfig
+
+# from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalVariantResult
 from pheval.utils.file_utils import all_files
 from pheval.utils.phenopacket_utils import GenomicVariant
@@ -19,7 +21,7 @@ def __init__(
         self,
         db_connection: DBConnector,
         table_name: str,
-        results_dir: Path,
+        column: str,
         threshold: float,
         score_order: str,
     ):
@@ -27,16 +29,17 @@ def __init__(
         Initialise AssessVariantPrioritisation class
 
         Args:
-            results_dir (Path): Path to the results directory
+            db_connection (DBConnector): DB connection.
+            table_name (str): Table name.
+            column (str): Column name.
             threshold (float): Threshold for scores
             score_order (str): Score order for results, either ascending or descending
 
         """
-        self.results_dir = results_dir
         self.threshold = threshold
         self.score_order = score_order
         self.conn = db_connection.conn
-        self.column = str(self.results_dir.parents[0])
+        self.column = column
         self.table_name = table_name
         db_connection.add_column_integer_default(
             table_name=table_name, column=self.column, default=0
@@ -163,7 +166,7 @@ def assess_variant_prioritisation(
 
 def assess_phenopacket_variant_prioritisation(
     phenopacket_path: Path,
-    results_dir_and_input: TrackInputOutputDirectories,
+    run: RunConfig,
     variant_binary_classification_stats: BinaryClassificationStats,
     variant_benchmarker: AssessVariantPrioritisation,
 ) -> None:
@@ -173,11 +176,11 @@ def assess_phenopacket_variant_prioritisation(
 
     Args:
         phenopacket_path (Path): Path to the Phenopacket.
-        results_dir_and_input (TrackInputOutputDirectories): Input and output directories.
+        run (RunConfig): Run configuration.
         variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         variant_benchmarker (AssessVariantPrioritisation): AssessVariantPrioritisation class instance.
     """
-    standardised_variant_result = results_dir_and_input.results_dir.joinpath(
+    standardised_variant_result = run.results_dir.joinpath(
         f"pheval_variant_results/{phenopacket_path.stem}-pheval_variant_result.tsv"
     )
     pheval_variant_result = read_standardised_result(standardised_variant_result)
@@ -189,7 +192,7 @@ def assess_phenopacket_variant_prioritisation(
 
 
 def benchmark_variant_prioritisation(
-    results_directory_and_input: TrackInputOutputDirectories,
+    run: RunConfig,
     score_order: str,
     threshold: float,
 ):
@@ -197,7 +200,7 @@ def benchmark_variant_prioritisation(
     Benchmark a directory based on variant prioritisation results.
 
     Args:
-        results_directory_and_input (TrackInputOutputDirectories): Input and output directories.
+        run (RunConfig): Run configuration.
         score_order (str): The order in which scores are arranged.
         threshold (float): Threshold for assessment.
 
@@ -209,26 +212,26 @@ def benchmark_variant_prioritisation(
     db_connection = DBConnector()
     variant_benchmarker = AssessVariantPrioritisation(
         db_connection,
-        f"{results_directory_and_input.phenopacket_dir.parents[0].name}" f"_variant",
-        results_directory_and_input.results_dir.joinpath("pheval_variant_results/"),
+        f"{run.phenopacket_dir.parents[0].name}" f"_variant",
+        run.run_identifier,
         threshold,
         score_order,
     )
-    for phenopacket_path in all_files(results_directory_and_input.phenopacket_dir):
+    for phenopacket_path in all_files(run.phenopacket_dir):
         assess_phenopacket_variant_prioritisation(
             phenopacket_path,
-            results_directory_and_input,
+            run,
             variant_binary_classification_stats,
             variant_benchmarker,
         )
     variant_rank_stats = RankStats()
     variant_rank_stats.add_ranks(
-        table_name=f"{results_directory_and_input.phenopacket_dir.parents[0].name}_variant",
-        column_name=str(results_directory_and_input.results_dir),
+        table_name=f"{run.phenopacket_dir.parents[0].name}_variant",
+        column_name=str(run.run_identifier),
     )
     return BenchmarkRunResults(
-        results_dir=results_directory_and_input.results_dir,
+        benchmark_name=run.run_identifier,
         rank_stats=variant_rank_stats,
         binary_classification_stats=variant_binary_classification_stats,
-        phenopacket_dir=results_directory_and_input.phenopacket_dir,
+        phenopacket_dir=run.phenopacket_dir,
     )
diff --git a/src/pheval/cli_pheval_utils.py b/src/pheval/cli_pheval_utils.py
index 9758ecb56..5ad37ad35 100644
--- a/src/pheval/cli_pheval_utils.py
+++ b/src/pheval/cli_pheval_utils.py
@@ -5,13 +5,9 @@
 
 import click
 
-from pheval.analyse.analysis import (
-    TrackInputOutputDirectories,
-    benchmark_directory,
-    benchmark_run_comparisons,
-)
+from pheval.analyse.analysis import benchmark_directory, benchmark_run_comparisons
 from pheval.analyse.generate_plots import generate_plots_from_benchmark_summary_tsv
-from pheval.analyse.run_data_parser import parse_run_data_text_file
+from pheval.analyse.run_data_parser import RunConfig, parse_run_config
 from pheval.prepare.create_noisy_phenopackets import scramble_phenopackets
 from pheval.prepare.create_spiked_vcf import spike_vcfs
 from pheval.prepare.custom_exceptions import InputError, MutuallyExclusiveOptionError
@@ -346,6 +342,14 @@ def create_spiked_vcfs_command(
     "pheval_variant_results/ or pheval_disease_results/. ",
     type=Path,
 )
+@click.option(
+    "--run-identifier",
+    "-r",
+    required=True,
+    metavar="STRING",
+    help="The run identifier.",
+    type=str,
+)
 @click.option(
     "--phenopacket-dir",
     "-p",
@@ -413,6 +417,7 @@ def create_spiked_vcfs_command(
 )
 def benchmark(
     directory: Path,
+    run_identifier: str,
     phenopacket_dir: Path,
     score_order: str,
     output_prefix: str,
@@ -426,13 +431,17 @@ def benchmark(
     if not gene_analysis and not variant_analysis and not disease_analysis:
         raise InputError("Need to specify at least one of gene/variant/disease analysis.")
     benchmark_directory(
-        TrackInputOutputDirectories(results_dir=directory, phenopacket_dir=phenopacket_dir),
+        RunConfig(
+            run_identifier=run_identifier,
+            phenopacket_dir=phenopacket_dir,
+            results_dir=directory,
+            gene_analysis=gene_analysis,
+            variant_analysis=variant_analysis,
+            disease_analysis=disease_analysis,
+        ),
         score_order,
         output_prefix,
         threshold,
-        gene_analysis,
-        variant_analysis,
-        disease_analysis,
         plot_type,
     )
 
@@ -474,30 +483,6 @@ def benchmark(
     help="Score threshold.",
     type=float,
 )
-@click.option(
-    "--gene-analysis/--no-gene-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for gene prioritisation",
-)
-@click.option(
-    "--variant-analysis/--no-variant-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for variant prioritisation",
-)
-@click.option(
-    "--disease-analysis/--no-disease-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for disease prioritisation",
-)
 @click.option(
     "--plot-type",
     "-y",
@@ -511,22 +496,14 @@ def benchmark_comparison(
     score_order: str,
     output_prefix: str,
     threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
     plot_type: str,
 ):
     """Benchmark the gene/variant/disease prioritisation performance for two runs."""
-    if not gene_analysis and not variant_analysis and not disease_analysis:
-        raise InputError("Need to specify at least one of gene/variant/disease analysis.")
     benchmark_run_comparisons(
-        parse_run_data_text_file(run_data),
+        parse_run_config(run_data),
         score_order,
         output_prefix,
         threshold,
-        gene_analysis,
-        variant_analysis,
-        disease_analysis,
         plot_type,
     )
 
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 84e5fdcaf..fda3f40c1 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -72,14 +72,14 @@ def setUp(self):
         self.assess_gene_prioritisation = AssessGenePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_gene",
-            results_dir=Path("/path/to/results_dir"),
+            column="run_1",
             threshold=0,
             score_order="descending",
         )
         self.assess_gene_prioritisation_ascending_order = AssessGenePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_gene",
-            results_dir=Path("/path/to/results_dir"),
+            column="run_1",
             threshold=0,
             score_order="ascending",
         )
@@ -241,14 +241,14 @@ def setUp(self):
         self.assess_variant_prioritisation = AssessVariantPrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_variant",
-            results_dir=Path("/path/to/results_dir"),
+            column="run_1",
             threshold=0,
             score_order="descending",
         )
         self.assess_variant_prioritisation_ascending_order = AssessVariantPrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_variant",
-            results_dir=Path("/path/to/results_dir"),
+            column="run_1",
             threshold=0,
             score_order="ascending",
         )
@@ -424,14 +424,14 @@ def setUp(self):
         self.assess_disease_prioritisation = AssessDiseasePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_disease",
-            results_dir=Path("/path/to/results_dir"),
+            column="run_1",
             threshold=0,
             score_order="descending",
         )
         self.assess_disease_prioritisation_ascending_order = AssessDiseasePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_disease",
-            results_dir=Path("/path/to/results_dir"),
+            column="run_1",
             threshold=0,
             score_order="ascending",
         )
diff --git a/tests/test_generate_summary_outputs.py b/tests/test_generate_summary_outputs.py
index 293a4f723..c8b9ef17c 100644
--- a/tests/test_generate_summary_outputs.py
+++ b/tests/test_generate_summary_outputs.py
@@ -1,5 +1,4 @@
 import unittest
-from pathlib import Path
 from unittest.mock import patch
 
 import duckdb
@@ -10,12 +9,8 @@
 
 class TestGetNewTableName(unittest.TestCase):
     def test_get_new_table_name(self):
-        new_table_name = get_new_table_name(
-            Path("/path/to/result_dir_1/corpus_1"), Path("/path/to/result_dir_2/corpus_1"), "gene"
-        )
-        self.assertEqual(
-            new_table_name, "result_dir_1_corpus_1_vs_result_dir_2_corpus_1_gene_rank_comparison"
-        )
+        new_table_name = get_new_table_name("run_1", "run_2", "gene")
+        self.assertEqual(new_table_name, "run_1_vs_run_2_gene_rank_comparison")
 
 
 class TestCreateComparisonTable(unittest.TestCase):

From 78ea8e2657309df6828edffc01768a53fac1126b Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Wed, 7 Aug 2024 12:43:25 +0100
Subject: [PATCH 45/81] clear plot figure before generating to avoid
 overlapping plots

---
 src/pheval/analyse/generate_plots.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index c3e388063..05268789b 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -159,6 +159,7 @@ def generate_stacked_bar_plot(
             self._generate_stacked_bar_plot_data(benchmark_result)
             self._generate_stats_mrr_bar_plot_data(benchmark_result)
         stats_df = pd.DataFrame(self.stats)
+        plt.clf()
         stats_df.set_index("Run").plot(
             kind="bar",
             stacked=True,
@@ -265,6 +266,7 @@ def generate_cumulative_bar(
         for benchmark_result in benchmarking_results:
             self._generate_cumulative_bar_plot_data(benchmark_result)
         stats_df = pd.DataFrame(self.stats)
+        plt.clf()
         sns.catplot(
             data=stats_df,
             kind="bar",
@@ -368,6 +370,7 @@ def generate_roc_curve(
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
         """
+        plt.clf()
         for i, benchmark_result in enumerate(benchmarking_results):
             fpr, tpr, thresh = roc_curve(
                 benchmark_result.binary_classification_stats.labels,
@@ -406,6 +409,7 @@ def generate_precision_recall(
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
         """
+        plt.clf()
         plt.figure()
         for i, benchmark_result in enumerate(benchmarking_results):
             precision, recall, thresh = precision_recall_curve(
@@ -446,6 +450,7 @@ def generate_non_cumulative_bar(
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
             title (str, optional): Title for the generated plot. Defaults to None.
         """
+        plt.clf()
         for benchmark_result in benchmarking_results:
             self._generate_non_cumulative_bar_plot_data(benchmark_result)
 

From 403e5d9c0a65d9c686027c541bb91672c82e1da3 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 19 Aug 2024 13:51:24 +0100
Subject: [PATCH 46/81] parse TSV result to a duckdb table in place of using a
 pandas

---
 .../disease_prioritisation_analysis.py        | 40 +++++++-----
 .../analyse/gene_prioritisation_analysis.py   | 64 ++++++-------------
 .../variant_prioritisation_analysis.py        | 45 +++++++------
 3 files changed, 69 insertions(+), 80 deletions(-)

diff --git a/src/pheval/analyse/disease_prioritisation_analysis.py b/src/pheval/analyse/disease_prioritisation_analysis.py
index b54c2251e..6a92e45b1 100644
--- a/src/pheval/analyse/disease_prioritisation_analysis.py
+++ b/src/pheval/analyse/disease_prioritisation_analysis.py
@@ -1,10 +1,8 @@
 from pathlib import Path
-from typing import List
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
 from pheval.analyse.get_connection import DBConnector
-from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import RunConfig
 from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
@@ -35,6 +33,7 @@ def __init__(
         """
         self.threshold = threshold
         self.score_order = score_order
+        self.db_connection = db_connection
         self.conn = db_connection.conn
         self.column = column
         self.table_name = table_name
@@ -114,7 +113,7 @@ def _record_matched_disease(
 
     def assess_disease_prioritisation(
         self,
-        standardised_disease_results: List[RankedPhEvalDiseaseResult],
+        standardised_disease_result_path: Path,
         phenopacket_path: Path,
         binary_classification_stats: BinaryClassificationStats,
     ) -> None:
@@ -125,7 +124,7 @@ def assess_disease_prioritisation(
         and records ranks using a PrioritisationRankRecorder.
 
         Args:
-            standardised_disease_results (List[RankedPhEvalDiseaseResult]): List of standardised disease results.
+            standardised_disease_result_path (Path): Path to the standardised disease TSV result.
             phenopacket_path (Path): Path to the phenopacket.
             binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         """
@@ -135,21 +134,32 @@ def assess_disease_prioritisation(
             (phenopacket_path.name,),
         ).fetchdf()
         for _i, row in df.iterrows():
-            generated_matches = list(
-                result
-                for result in standardised_disease_results
-                if row["disease_name"] == result.disease_name
-                or row["disease_identifier"] == result.disease_identifier
+            result = (
+                self.conn.execute(
+                    f"SELECT * FROM '{standardised_disease_result_path}' "
+                    f"WHERE contains_entity_function(CAST(COALESCE(disease_identifier, '') AS VARCHAR),"
+                    f" '{row['disease_identifier']}') "
+                    f"OR contains_entity_function(CAST(COALESCE(disease_name, '') AS VARCHAR), "
+                    f"'{row['disease_name']}')"
+                )
+                .fetchdf()
+                .to_dict(orient="records")
             )
-            if len(generated_matches) > 0:
-                disease_match = self._record_matched_disease(generated_matches[0])
+
+            if len(result) > 0:
+                disease_match = self._record_matched_disease(RankedPhEvalDiseaseResult(**result[0]))
                 relevant_ranks.append(disease_match)
                 primary_key = f"{phenopacket_path.name}-{row['disease_identifier']}"
                 self.conn.execute(
                     f'UPDATE {self.table_name} SET "{self.column}" = ? WHERE identifier = ?',
                     (disease_match, primary_key),
                 )
-        binary_classification_stats.add_classification(standardised_disease_results, relevant_ranks)
+        binary_classification_stats.add_classification(
+            self.db_connection.parse_table_into_dataclass(
+                str(standardised_disease_result_path), RankedPhEvalDiseaseResult
+            ),
+            relevant_ranks,
+        )
 
 
 def assess_phenopacket_disease_prioritisation(
@@ -168,12 +178,11 @@ def assess_phenopacket_disease_prioritisation(
         disease_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         disease_benchmarker (AssessDiseasePrioritisation): AssessDiseasePrioritisation class instance.
     """
-    standardised_disease_result = run.results_dir.joinpath(
+    standardised_disease_result_path = run.results_dir.joinpath(
         f"pheval_disease_results/{phenopacket_path.stem}-pheval_disease_result.tsv"
     )
-    pheval_disease_result = read_standardised_result(standardised_disease_result)
     disease_benchmarker.assess_disease_prioritisation(
-        parse_pheval_result(RankedPhEvalDiseaseResult, pheval_disease_result),
+        standardised_disease_result_path,
         phenopacket_path,
         disease_binary_classification_stats,
     )
@@ -198,6 +207,7 @@ def benchmark_disease_prioritisation(
     """
     disease_binary_classification_stats = BinaryClassificationStats()
     db_connection = DBConnector()
+    db_connection.initialise()
     disease_benchmarker = AssessDiseasePrioritisation(
         db_connection,
         f"{run.phenopacket_dir.parents[0].name}_disease",
diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 743f3e346..689f02b70 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -1,12 +1,8 @@
-import ast
-import re
 from pathlib import Path
-from typing import List, Union
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
 from pheval.analyse.get_connection import DBConnector
-from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import RunConfig
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
@@ -102,27 +98,9 @@ def _record_matched_gene(
                 )
             )
 
-    @staticmethod
-    def _check_string_representation(entity: str) -> Union[List[str], str]:
-        """
-        Check if the input string is a representation of a list and returns the list if true, otherwise the string.
-
-        Args:
-            entity (str): The input entity to check.
-
-        Returns:
-            Union[List[str], str]: A list if the input string is a list representation, otherwise
-            the original string.
-        """
-        list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*\]$")
-        if list_pattern.match(str(entity)):
-            return ast.literal_eval(entity)
-        else:
-            return entity
-
     def assess_gene_prioritisation(
         self,
-        standardised_gene_results: List[RankedPhEvalGeneResult],
+        standardised_gene_result_path: Path,
         phenopacket_path: Path,
         binary_classification_stats: BinaryClassificationStats,
     ) -> None:
@@ -132,7 +110,7 @@ def assess_gene_prioritisation(
         and records ranks using a PrioritisationRankRecorder.
 
         Args:
-            standardised_gene_results (List[RankedPhEvalGeneResult]) List of standardised gene results.
+            standardised_gene_result_path (Path): Path to the standardised gene TSV result.
             phenopacket_path (Path): Path to the Phenopacket.
             binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         """
@@ -141,24 +119,19 @@ def assess_gene_prioritisation(
             f"""SELECT * FROM {self.table_name} WHERE phenopacket = '{phenopacket_path.name}'"""
         ).fetchdf()
         for _i, row in df.iterrows():
-            generated_matches = list(
-                result
-                for result in standardised_gene_results
-                if (
-                    isinstance(self._check_string_representation(result.gene_identifier), list)
-                    and row["gene_identifier"]
-                    in self._check_string_representation(result.gene_identifier)
-                    or isinstance(self._check_string_representation(result.gene_identifier), str)
-                    and row["gene_identifier"]
-                    == self._check_string_representation(result.gene_identifier)
-                    or isinstance(self._check_string_representation(result.gene_symbol), list)
-                    and row["gene_symbol"] in self._check_string_representation(result.gene_symbol)
-                    or isinstance(self._check_string_representation(result.gene_symbol), str)
-                    and row["gene_symbol"] == self._check_string_representation(result.gene_symbol)
+            result = (
+                self.conn.execute(
+                    f"SELECT * FROM '{standardised_gene_result_path}' "
+                    f"WHERE contains_entity_function(CAST(COALESCE(gene_identifier, '') AS VARCHAR),"
+                    f" '{row['gene_identifier']}') "
+                    f"OR contains_entity_function(CAST(COALESCE(gene_symbol, '') AS VARCHAR), "
+                    f"'{row['gene_symbol']}')"
                 )
+                .fetchdf()
+                .to_dict(orient="records")
             )
-            if len(generated_matches) > 0:
-                gene_match = self._record_matched_gene(generated_matches[0])
+            if len(result) > 0:
+                gene_match = self._record_matched_gene(RankedPhEvalGeneResult(**result[0]))
                 relevant_ranks.append(gene_match)
                 primary_key = f"{phenopacket_path.name}-{row['gene_symbol']}"
                 self.conn.execute(
@@ -166,7 +139,10 @@ def assess_gene_prioritisation(
                     (gene_match, primary_key),
                 )
         binary_classification_stats.add_classification(
-            pheval_results=standardised_gene_results, relevant_ranks=relevant_ranks
+            self.db_connection.parse_table_into_dataclass(
+                str(standardised_gene_result_path), RankedPhEvalGeneResult
+            ),
+            relevant_ranks,
         )
 
 
@@ -186,12 +162,11 @@ def assess_phenopacket_gene_prioritisation(
         gene_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         gene_benchmarker (AssessGenePrioritisation): AssessGenePrioritisation class instance.
     """
-    standardised_gene_result = run.results_dir.joinpath(
+    standardised_gene_result_path = run.results_dir.joinpath(
         f"pheval_gene_results/{phenopacket_path.stem}-pheval_gene_result.tsv"
     )
-    pheval_gene_result = read_standardised_result(standardised_gene_result)
     gene_benchmarker.assess_gene_prioritisation(
-        parse_pheval_result(RankedPhEvalGeneResult, pheval_gene_result),
+        standardised_gene_result_path,
         phenopacket_path,
         gene_binary_classification_stats,
     )
@@ -214,6 +189,7 @@ def benchmark_gene_prioritisation(
     """
     gene_binary_classification_stats = BinaryClassificationStats()
     db_connection = DBConnector()
+    db_connection.initialise()
     gene_benchmarker = AssessGenePrioritisation(
         db_connection,
         f"{run.phenopacket_dir.parents[0].name}" f"_gene",
diff --git a/src/pheval/analyse/variant_prioritisation_analysis.py b/src/pheval/analyse/variant_prioritisation_analysis.py
index ea45351d0..c76998c04 100644
--- a/src/pheval/analyse/variant_prioritisation_analysis.py
+++ b/src/pheval/analyse/variant_prioritisation_analysis.py
@@ -1,14 +1,10 @@
 from pathlib import Path
-from typing import List
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
 from pheval.analyse.get_connection import DBConnector
-from pheval.analyse.parse_pheval_result import parse_pheval_result, read_standardised_result
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import RunConfig
-
-# from pheval.analyse.run_data_parser import TrackInputOutputDirectories
 from pheval.post_processing.post_processing import RankedPhEvalVariantResult
 from pheval.utils.file_utils import all_files
 from pheval.utils.phenopacket_utils import GenomicVariant
@@ -38,6 +34,7 @@ def __init__(
         """
         self.threshold = threshold
         self.score_order = score_order
+        self.db_connection = db_connection
         self.conn = db_connection.conn
         self.column = column
         self.table_name = table_name
@@ -112,7 +109,7 @@ def _record_matched_variant(
 
     def assess_variant_prioritisation(
         self,
-        standardised_variant_results: List[RankedPhEvalVariantResult],
+        standardised_variant_result_path: Path,
         phenopacket_path: Path,
         binary_classification_stats: BinaryClassificationStats,
     ) -> None:
@@ -123,7 +120,7 @@ def assess_variant_prioritisation(
         and records ranks using a PrioritisationRankRecorder.
 
         Args:
-            standardised_variant_results (List[RankedPhEvalVariantResult]): List of standardised variant results.
+            standardised_variant_result_path (Path): Path to standardised variant TSV result.
             phenopacket_path (Path): Path to the phenopacket.
             binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         """
@@ -138,19 +135,21 @@ def assess_variant_prioritisation(
                 ref=row["ref"],
                 alt=row["alt"],
             )
-            generated_matches = list(
-                result
-                for result in standardised_variant_results
-                if causative_variant
-                == GenomicVariant(
-                    chrom=result.chromosome,
-                    pos=result.start,
-                    alt=result.alt,
-                    ref=result.ref,
+            result = (
+                self.conn.execute(
+                    f"SELECT * FROM '{standardised_variant_result_path}' "
+                    f"WHERE "
+                    f"chromosome == '{causative_variant.chrom}' AND "
+                    f"start == {causative_variant.pos} AND "
+                    f"ref == '{causative_variant.ref}' AND "
+                    f"alt == '{causative_variant.alt}'"
                 )
+                .fetchdf()
+                .to_dict(orient="records")
             )
-            if len(generated_matches) > 0:
-                variant_match = self._record_matched_variant(generated_matches[0])
+
+            if len(result) > 0:
+                variant_match = self._record_matched_variant(RankedPhEvalVariantResult(**result[0]))
                 relevant_ranks.append(variant_match)
                 primary_key = (
                     f"{phenopacket_path.name}-{causative_variant.chrom}-{causative_variant.pos}-"
@@ -161,7 +160,12 @@ def assess_variant_prioritisation(
                     (variant_match, primary_key),
                 )
 
-        binary_classification_stats.add_classification(standardised_variant_results, relevant_ranks)
+        binary_classification_stats.add_classification(
+            self.db_connection.parse_table_into_dataclass(
+                str(standardised_variant_result_path), RankedPhEvalVariantResult
+            ),
+            relevant_ranks,
+        )
 
 
 def assess_phenopacket_variant_prioritisation(
@@ -180,12 +184,11 @@ def assess_phenopacket_variant_prioritisation(
         variant_binary_classification_stats (BinaryClassificationStats): BinaryClassificationStats class instance.
         variant_benchmarker (AssessVariantPrioritisation): AssessVariantPrioritisation class instance.
     """
-    standardised_variant_result = run.results_dir.joinpath(
+    standardised_variant_result_path = run.results_dir.joinpath(
         f"pheval_variant_results/{phenopacket_path.stem}-pheval_variant_result.tsv"
     )
-    pheval_variant_result = read_standardised_result(standardised_variant_result)
     variant_benchmarker.assess_variant_prioritisation(
-        parse_pheval_result(RankedPhEvalVariantResult, pheval_variant_result),
+        standardised_variant_result_path,
         phenopacket_path,
         variant_binary_classification_stats,
     )

From a5d4ee0fc58b3b422338ae05933957f1806ecc8c Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 19 Aug 2024 13:52:09 +0100
Subject: [PATCH 47/81] add custom `contains_entity_function` and function to
 parse a table into a list of dataclass instances

---
 src/pheval/analyse/get_connection.py | 72 ++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index b54e6a8e0..f0b4a33fc 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -1,6 +1,16 @@
+import ast
+import re
+from typing import List, Type, Union
+
 import duckdb
 from duckdb import DuckDBPyConnection
 
+from pheval.post_processing.post_processing import (
+    RankedPhEvalDiseaseResult,
+    RankedPhEvalGeneResult,
+    RankedPhEvalVariantResult,
+)
+
 
 class DBConnector:
     """
@@ -11,6 +21,10 @@ def __init__(self):
         """Initialize the DBConnector class."""
         self.conn = self.get_connection()
 
+    def initialise(self):
+        """Initialise the duckdb connection."""
+        self.add_contains_function()
+
     @staticmethod
     def get_connection() -> DuckDBPyConnection:
         """
@@ -46,6 +60,64 @@ def drop_table(self, table_name: str) -> None:
         """
         self.conn.execute(f"""DROP TABLE IF EXISTS "{table_name}";""")
 
+    @staticmethod
+    def contains_entity_function(entity: str, known_causative_entity: str) -> bool:
+        """
+        Determines if a known causative entity is present within an entity or list of entities.
+        Args:
+            entity (str): The entity to be checked. It can be a single entity or a string representation of a list.
+            known_causative_entity (str): The entity to search for within the `entity`.
+
+        Returns:
+            bool: `True` if `known_causative_entity` is found in `entity` (or its list representation),
+                `False` otherwise.
+        """
+        list_pattern = re.compile(r"^\[\s*(?:[^\[\],\s]+(?:\s*,\s*[^\[\],\s]+)*)?\s*]$")
+        if list_pattern.match(str(entity)):
+            list_representation = ast.literal_eval(entity)
+            if isinstance(list_representation, list):
+                return known_causative_entity in list_representation
+        return known_causative_entity == entity
+
+    def add_contains_function(self) -> None:
+        """
+        Adds a custom `contains_entity_function` to the DuckDB connection if it does not already exist.
+        """
+        result = self.conn.execute(
+            "SELECT * FROM duckdb_functions() WHERE function_name = ?", ["contains_entity_function"]
+        ).fetchall()
+        if not result:
+            self.conn.create_function("contains_entity_function", self.contains_entity_function)
+
+    def parse_table_into_dataclass(
+        self,
+        table_name: str,
+        dataclass: Union[
+            Type[RankedPhEvalGeneResult],
+            Type[RankedPhEvalVariantResult],
+            Type[RankedPhEvalDiseaseResult],
+        ],
+    ) -> Union[
+        List[RankedPhEvalGeneResult],
+        List[RankedPhEvalVariantResult],
+        List[RankedPhEvalDiseaseResult],
+    ]:
+        """
+        Parses a DuckDB table into a list of dataclass instances.
+        Args:
+            table_name (str): The name of the DuckDB table to be parsed.
+            dataclass (Union[Type[RankedPhEvalGeneResult], Type[RankedPhEvalVariantResult],
+            Type[RankedPhEvalDiseaseResult]]):
+                The dataclass type to which each row in the table should be mapped.
+
+        Returns:
+            List[dataclass]: A list of instances of the provided dataclass, each representing a row from the table.
+        """
+        result = (
+            self.conn.execute(f"SELECT * FROM '{table_name}'").fetchdf().to_dict(orient="records")
+        )
+        return [dataclass(**row) for row in result]
+
     def close(self):
         """Close the connection to the database."""
         self.conn.close()

From d96751ffa1c40d34a464a62f5d78fc7130c3e703 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 19 Aug 2024 13:52:21 +0100
Subject: [PATCH 48/81] reformat tests to use duckdb table

---
 tests/test_analysis.py | 134 ++++++++++++-----------------------------
 1 file changed, 38 insertions(+), 96 deletions(-)

diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index fda3f40c1..42b6e665d 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -30,32 +30,16 @@ def setUpClass(cls):
             "('phenopacket_1.json-PLXNA1', 'phenopacket_1.json', 'PLXNA1', 'ENSG00000114554'),"
             "('phenopacket_1.json-LARGE1', 'phenopacket_1.json', 'LARGE1', 'ENSG00000133424'),"
         )
-        cls.standardised_gene_results = [
-            RankedPhEvalGeneResult(
-                gene_symbol="PLXNA1",
-                gene_identifier="ENSG00000114554",
-                score=0.8764,
-                rank=1,
-            ),
-            RankedPhEvalGeneResult(
-                gene_symbol="ZNF804B",
-                gene_identifier="ENSG00000182348",
-                score=0.5777,
-                rank=2,
-            ),
-            RankedPhEvalGeneResult(
-                gene_symbol="SMCO2",
-                gene_identifier="ENSG00000165935",
-                score=0.5777,
-                rank=2,
-            ),
-            RankedPhEvalGeneResult(
-                gene_symbol="SPNS1",
-                gene_identifier="ENSG00000169682",
-                score=0.3765,
-                rank=4,
-            ),
-        ]
+        cls.db_connection.execute(
+            "CREATE TABLE result (rank INTEGER, score DOUBLE, gene_symbol VARCHAR, gene_identifier VARCHAR)"
+        )
+        cls.db_connection.execute(
+            "INSERT INTO result (rank, score, gene_symbol, gene_identifier) VALUES "
+            "(1, 0.8764, 'PLXNA1', 'ENSG00000114554'),"
+            "(2, 0.5777, 'ZNF804B', 'ENSG00000182348'),"
+            "(2, 0.5777, 'SMCO2', 'ENSG00000165935'),"
+            "(4, 0.3765, 'SPNS1', 'ENSG00000169682')"
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -146,8 +130,9 @@ def test_assess_gene_with_threshold_meets_cutoff(self):
         )
 
     def test_assess_gene_prioritisation_no_threshold(self):
+        self.db_connector.add_contains_function()
         self.assess_gene_prioritisation.assess_gene_prioritisation(
-            self.standardised_gene_results,
+            "result",
             Path("/path/to/phenopacket_1.json"),
             self.binary_classification_stats,
         )
@@ -171,17 +156,6 @@ def test_assess_gene_prioritisation_no_threshold(self):
             ),
         )
 
-    def test__check_string_representation_string(self):
-        self.assertEqual(
-            self.assess_gene_prioritisation._check_string_representation("GENE1"), "GENE1"
-        )
-
-    def test__check_string_representation_list(self):
-        self.assertEqual(
-            self.assess_gene_prioritisation._check_string_representation("['GENE1', 'GENE2']"),
-            ["GENE1", "GENE2"],
-        )
-
 
 class TestAssessVariantPrioritisation(unittest.TestCase):
     @classmethod
@@ -196,35 +170,16 @@ def setUpClass(cls):
             "('phenopacket_1.json-3-126741108-G-C', 'phenopacket_1.json', '3', 126741108, 'G', 'C'),"
             "('phenopacket_1.json-16-133564345-C-T', 'phenopacket_1.json', '16', 133564345, 'C', 'T'),"
         )
-        cls.standardised_variant_results = [
-            RankedPhEvalVariantResult(
-                chromosome="3",
-                start=126730873,
-                end=126730873,
-                ref="G",
-                alt="A",
-                score=0.0484,
-                rank=1,
-            ),
-            RankedPhEvalVariantResult(
-                chromosome="3",
-                start=126730873,
-                end=126730873,
-                ref="G",
-                alt="A",
-                score=0.0484,
-                rank=1,
-            ),
-            RankedPhEvalVariantResult(
-                chromosome="3",
-                start=126741108,
-                end=126741108,
-                ref="G",
-                alt="C",
-                score=0.0484,
-                rank=2,
-            ),
-        ]
+        cls.db_connection.execute(
+            "CREATE TABLE result (rank INTEGER, score DOUBLE,"
+            'chromosome VARCHAR, start INTEGER, "end" INTEGER, ref VARCHAR, alt VARCHAR)'
+        )
+        cls.db_connection.execute(
+            'INSERT INTO result (rank, score, chromosome, start, "end", ref, alt) VALUES '
+            "(1, 0.0484, '3', 126730873, 126730873 ,'G', 'A'),"
+            "(1, 0.0484, '3', 126730873, 126730873 ,'G', 'T'),"
+            "(3, 0.0484, '3', 126741108, 126741108 ,'G', 'C'),"
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -327,8 +282,9 @@ def test_assess_variant_with_threshold_meets_cutoff(self):
         )
 
     def test_assess_variant_prioritisation(self):
+        self.db_connector.add_contains_function()
         self.assess_variant_prioritisation.assess_variant_prioritisation(
-            self.standardised_variant_results,
+            "result",
             Path("/path/to/phenopacket_1.json"),
             self.binary_classification_stats,
         )
@@ -343,7 +299,7 @@ def test_assess_variant_prioritisation(self):
                     126741108,
                     "G",
                     "C",
-                    2,
+                    3,
                 ),
                 (
                     "phenopacket_1.json-16-133564345-C-T",
@@ -382,32 +338,17 @@ def setUpClass(cls):
             "INSERT INTO test_table_disease (identifier, phenopacket, disease_identifier, disease_name) VALUES "
             "('phenopacket_1.json-OMIM:231670', 'phenopacket_1.json', 'OMIM:231670', 'Glutaric aciduria type 1'),"
         )
-        cls.standardised_disease_results = [
-            RankedPhEvalDiseaseResult(
-                disease_name="Glutaric aciduria type 1",
-                disease_identifier="OMIM:231670",
-                score=1.0,
-                rank=1,
-            ),
-            RankedPhEvalDiseaseResult(
-                disease_name="Glutaric aciduria type 2",
-                disease_identifier="OMIM:231680",
-                score=0.5,
-                rank=2,
-            ),
-            RankedPhEvalDiseaseResult(
-                disease_name="Glutaric aciduria type 3",
-                disease_identifier="OMIM:231690",
-                score=0.5,
-                rank=2,
-            ),
-            RankedPhEvalDiseaseResult(
-                disease_name="Glutaric aciduria type 4",
-                disease_identifier="OMIM:231700",
-                score=0.3,
-                rank=4,
-            ),
-        ]
+        cls.db_connection.execute(
+            "CREATE TABLE result (rank INTEGER, score DOUBLE,"
+            "disease_identifier VARCHAR, disease_name VARCHAR)"
+        )
+        cls.db_connection.execute(
+            "INSERT INTO result (rank, score, disease_identifier, disease_name) VALUES "
+            "(1, 1.0, 'OMIM:231670', 'Glutaric aciduria type 1'),"
+            "(2, 0.5, 'OMIM:231680', 'Glutaric aciduria type 2'),"
+            "(2, 0.5, 'OMIM:231690', 'Glutaric aciduria type 3'),"
+            "(4, 0.3, 'OMIM:231700', 'Glutaric aciduria type 4'),"
+        )
 
     @classmethod
     def tearDownClass(cls):
@@ -498,8 +439,9 @@ def test_assess_disease_with_threshold_meets_cutoff(self):
         )
 
     def test_assess_disease_prioritisation(self):
+        self.db_connector.add_contains_function()
         self.assess_disease_prioritisation.assess_disease_prioritisation(
-            self.standardised_disease_results,
+            "result",
             Path("/path/to/phenopacket_1.json"),
             self.binary_classification_stats,
         )

From a73112fbb35e9b102bc6d4acb18205497e172d16 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 19 Aug 2024 13:56:41 +0100
Subject: [PATCH 49/81] remove unused methods

---
 src/pheval/analyse/parse_pheval_result.py |  43 -----
 src/pheval/analyse/run_data_parser.py     |  14 --
 tests/test_parse_pheval_result.py         | 181 ----------------------
 3 files changed, 238 deletions(-)
 delete mode 100644 src/pheval/analyse/parse_pheval_result.py
 delete mode 100644 tests/test_parse_pheval_result.py

diff --git a/src/pheval/analyse/parse_pheval_result.py b/src/pheval/analyse/parse_pheval_result.py
deleted file mode 100644
index 22c91a40c..000000000
--- a/src/pheval/analyse/parse_pheval_result.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import logging
-from pathlib import Path
-from typing import List
-
-import pandas as pd
-
-from pheval.post_processing.post_processing import PhEvalResult
-
-info_log = logging.getLogger("info")
-
-
-def read_standardised_result(standardised_result_path: Path) -> List[dict]:
-    """
-    Read the standardised result output and return a list of dictionaries.
-
-    Args:
-        standardised_result_path (Path): The path to the file containing the standardised result output.
-
-    Returns:
-        List[dict]: A list of dictionaries representing the content of the standardised result file.
-    """
-    if standardised_result_path.is_file():
-        return pd.read_csv(standardised_result_path, delimiter="\t").to_dict("records")
-    else:
-        info_log.info(f"Could not find {standardised_result_path}")
-        return pd.DataFrame().to_dict("records")
-
-
-def parse_pheval_result(
-    data_class_type: PhEvalResult, pheval_result: List[dict]
-) -> List[PhEvalResult]:
-    """
-    Parse PhEval result into specified dataclass type.
-
-    Args:
-        data_class_type (PhEvalResult): The data class type to parse the result into.
-        pheval_result (List[dict]): A list of dictionaries representing the PhEval result.
-
-    Returns:
-        List[PhEvalResult]: A list of instances of the specified data class type,
-        each instance representing a row in the PhEval result.
-    """
-    return [data_class_type(**row) for row in pheval_result]
diff --git a/src/pheval/analyse/run_data_parser.py b/src/pheval/analyse/run_data_parser.py
index 75b6cc182..985bb1427 100644
--- a/src/pheval/analyse/run_data_parser.py
+++ b/src/pheval/analyse/run_data_parser.py
@@ -50,17 +50,3 @@ def parse_run_config(run_data_path: Path) -> Config:
     f.close()
     config = Config(**config_data)
     return config
-
-
-@dataclass
-class TrackInputOutputDirectories:
-    """
-    Track the input phenopacket test data for a corresponding pheval output directory.
-
-    Attributes:
-        phenopacket_dir (Path): The directory containing input phenopackets.
-        results_dir (Path): The directory containing output results from pheval.
-    """
-
-    phenopacket_dir: Path
-    results_dir: Path
diff --git a/tests/test_parse_pheval_result.py b/tests/test_parse_pheval_result.py
deleted file mode 100644
index 814e27f85..000000000
--- a/tests/test_parse_pheval_result.py
+++ /dev/null
@@ -1,181 +0,0 @@
-from unittest import TestCase
-
-from pheval.analyse.parse_pheval_result import parse_pheval_result
-from pheval.post_processing.post_processing import (
-    RankedPhEvalDiseaseResult,
-    RankedPhEvalGeneResult,
-    RankedPhEvalVariantResult,
-)
-
-
-class TestParsePhEvalResult(TestCase):
-    def test_parse_pheval_gene_result(self):
-        self.assertEqual(
-            parse_pheval_result(
-                RankedPhEvalGeneResult,
-                [
-                    {
-                        "gene_symbol": "PLXNA1",
-                        "gene_identifier": "ENSG00000114554",
-                        "score": 0.8764,
-                        "rank": 1,
-                    },
-                    {
-                        "gene_symbol": "ZNF804B",
-                        "gene_identifier": "ENSG00000182348",
-                        "score": 0.5777,
-                        "rank": 2,
-                    },
-                    {
-                        "gene_symbol": "SMCO2",
-                        "gene_identifier": "ENSG00000165935",
-                        "score": 0.5777,
-                        "rank": 2,
-                    },
-                    {
-                        "gene_symbol": "SPNS1",
-                        "gene_identifier": "ENSG00000169682",
-                        "score": 0.3765,
-                        "rank": 4,
-                    },
-                ],
-            ),
-            [
-                RankedPhEvalGeneResult(
-                    gene_symbol="PLXNA1",
-                    gene_identifier="ENSG00000114554",
-                    score=0.8764,
-                    rank=1,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="ZNF804B",
-                    gene_identifier="ENSG00000182348",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="SMCO2",
-                    gene_identifier="ENSG00000165935",
-                    score=0.5777,
-                    rank=2,
-                ),
-                RankedPhEvalGeneResult(
-                    gene_symbol="SPNS1",
-                    gene_identifier="ENSG00000169682",
-                    score=0.3765,
-                    rank=4,
-                ),
-            ],
-        )
-
-    def test_parse_pheval_variant_result(self):
-        self.assertEqual(
-            parse_pheval_result(
-                RankedPhEvalVariantResult,
-                [
-                    {
-                        "chromosome": "3",
-                        "start": 126730873,
-                        "end": 126730873,
-                        "ref": "G",
-                        "alt": "A",
-                        "score": 0.0484,
-                        "rank": 1,
-                    },
-                    {
-                        "chromosome": "3",
-                        "start": 126730873,
-                        "end": 126730873,
-                        "ref": "G",
-                        "alt": "A",
-                        "score": 0.0484,
-                        "rank": 1,
-                    },
-                    {
-                        "chromosome": "3",
-                        "start": 126741108,
-                        "end": 126741108,
-                        "ref": "G",
-                        "alt": "A",
-                        "score": 0.0484,
-                        "rank": 1,
-                    },
-                ],
-            ),
-            [
-                RankedPhEvalVariantResult(
-                    chromosome="3",
-                    start=126730873,
-                    end=126730873,
-                    ref="G",
-                    alt="A",
-                    score=0.0484,
-                    rank=1,
-                ),
-                RankedPhEvalVariantResult(
-                    chromosome="3",
-                    start=126730873,
-                    end=126730873,
-                    ref="G",
-                    alt="A",
-                    score=0.0484,
-                    rank=1,
-                ),
-                RankedPhEvalVariantResult(
-                    chromosome="3",
-                    start=126741108,
-                    end=126741108,
-                    ref="G",
-                    alt="A",
-                    score=0.0484,
-                    rank=1,
-                ),
-            ],
-        )
-
-    def test_parse_pheval_disease_result(self):
-        self.assertEqual(
-            parse_pheval_result(
-                RankedPhEvalDiseaseResult,
-                [
-                    {
-                        "disease_name": "Glutaric aciduria type 1",
-                        "disease_identifier": "OMIM:231670",
-                        "score": 1.0,
-                        "rank": 1,
-                    },
-                    {
-                        "disease_name": "Glutaric aciduria type 2",
-                        "disease_identifier": "OMIM:231680",
-                        "score": 0.8,
-                        "rank": 2,
-                    },
-                    {
-                        "disease_name": "Glutaric aciduria type 3",
-                        "disease_identifier": "OMIM:231690",
-                        "score": 0.6,
-                        "rank": 3,
-                    },
-                ],
-            ),
-            [
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 1",
-                    disease_identifier="OMIM:231670",
-                    score=1.0,
-                    rank=1,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 2",
-                    disease_identifier="OMIM:231680",
-                    score=0.8,
-                    rank=2,
-                ),
-                RankedPhEvalDiseaseResult(
-                    disease_name="Glutaric aciduria type 3",
-                    disease_identifier="OMIM:231690",
-                    score=0.6,
-                    rank=3,
-                ),
-            ],
-        )

From 195f7faf0e459879c3427e33c1444fe29feed9ad Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Mon, 19 Aug 2024 14:06:51 +0100
Subject: [PATCH 50/81] remove unused import

---
 src/pheval/analyse/run_data_parser.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/pheval/analyse/run_data_parser.py b/src/pheval/analyse/run_data_parser.py
index 985bb1427..bf2f9ba62 100644
--- a/src/pheval/analyse/run_data_parser.py
+++ b/src/pheval/analyse/run_data_parser.py
@@ -1,4 +1,3 @@
-from dataclasses import dataclass
 from pathlib import Path
 from typing import List
 

From 02c60ebe0b32e3fb9b77d91bd35e476abfb3154b Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:26:59 +0100
Subject: [PATCH 51/81] remove methods for benchmarking a single directory, add
 plot customisation,

---
 src/pheval/analyse/analysis.py | 147 +++++++--------------------------
 1 file changed, 30 insertions(+), 117 deletions(-)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index 820e35f20..30c9ff36e 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -6,99 +6,17 @@
 )
 from pheval.analyse.generate_summary_outputs import (
     generate_benchmark_comparison_output,
-    generate_benchmark_output,
 )
 from pheval.analyse.parse_corpus import CorpusParser
 from pheval.analyse.rank_stats import RankStatsWriter
-from pheval.analyse.run_data_parser import Config, RunConfig
-
-
-def _run_benchmark(
-    run_config: RunConfig,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
-) -> None:
-    """Run a benchmark on a result directory.
-
-    Args:
-        run_config (RunConfig): Run configuration.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
-        threshold (float): The threshold for benchmark evaluation.
-        plot_type (str): Type of plot for benchmark visualisation.
-        benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
-    """
-    CorpusParser(run_config.phenopacket_dir).parse_corpus(benchmark_generator)
-    stats_writer = RankStatsWriter(
-        str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
-    )
-    benchmark_result = benchmark_generator.generate_benchmark_run_results(
-        run_config, score_order, threshold
-    )
-    stats_writer.add_statistics_entry(
-        run_config.run_identifier,
-        benchmark_result.rank_stats,
-        benchmark_result.binary_classification_stats,
-    )
-    generate_benchmark_output(benchmark_result, plot_type, benchmark_generator)
-
-
-def benchmark_directory(
-    run_config: RunConfig,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
-) -> None:
-    """
-    Benchmark prioritisation performance for a single run.
-
-    Args:
-        run_config (RunConfig): Run configuration.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
-        threshold (float): The threshold for benchmark evaluation.
-        plot_type (str): Type of plot for benchmark visualisation.
-    """
-    if run_config.gene_analysis:
-        _run_benchmark(
-            run_config=run_config,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=GeneBenchmarkRunOutputGenerator(),
-        )
-    if run_config.variant_analysis:
-        _run_benchmark(
-            run_config=run_config,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=VariantBenchmarkRunOutputGenerator(),
-        )
-    if run_config.disease_analysis:
-        _run_benchmark(
-            run_config=run_config,
-            score_order=score_order,
-            output_prefix=output_prefix,
-            threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=DiseaseBenchmarkRunOutputGenerator(),
-        )
+from pheval.analyse.run_data_parser import Config
 
 
 def _run_benchmark_comparison(
-    run_config: Config,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
+        run_config: Config,
+        score_order: str,
+        threshold: float,
+        benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
     Run a benchmark on several result directories.
@@ -107,23 +25,19 @@ def _run_benchmark_comparison(
         run_config (List[TrackInputOutputDirectories]): List of input and output directories
             for tracking results across multiple directories.
         score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
         threshold (float): The threshold for benchmark evaluation.
-        plot_type (str): Type of plot for benchmark visualisation.
         benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
     """
-    stats_writer = RankStatsWriter(
-        str(output_prefix + benchmark_generator.stats_comparison_file_suffix)
-    )
+    stats_writer = RankStatsWriter(run_config.benchmark_name, benchmark_generator.stats_comparison_file)
     unique_test_corpora_directories = set([result.phenopacket_dir for result in run_config.runs])
     [
-        CorpusParser(test_corpora_directory).parse_corpus(benchmark_generator)
+        CorpusParser(run_config.benchmark_name, test_corpora_directory).parse_corpus(benchmark_generator)
         for test_corpora_directory in unique_test_corpora_directories
     ]
     benchmarking_results = []
     for run in run_config.runs:
         benchmark_result = benchmark_generator.generate_benchmark_run_results(
-            run, score_order, threshold
+            run_config.benchmark_name, run, score_order, threshold
         )
         stats_writer.add_statistics_entry(
             run.run_identifier,
@@ -134,9 +48,9 @@ def _run_benchmark_comparison(
     run_identifiers = [run.run_identifier for run in run_config.runs]
     [
         generate_benchmark_comparison_output(
+            run_config.benchmark_name,
             benchmarking_results,
             run_identifiers,
-            plot_type,
             benchmark_generator,
             f"{unique_test_corpora_directory.parents[0].name}_"
             f"{benchmark_generator.prioritisation_type_string}",
@@ -146,11 +60,9 @@ def _run_benchmark_comparison(
 
 
 def benchmark_run_comparisons(
-    run_config: Config,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
+        run_config: Config,
+        score_order: str,
+        threshold: float,
 ) -> None:
     """
     Benchmark prioritisation performance for several runs.
@@ -158,37 +70,38 @@ def benchmark_run_comparisons(
     Args:
         run_config (Config): Run configurations.
         score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        output_prefix (str): Prefix for the benchmark output file names.
         threshold (float): The threshold for benchmark evaluation.
-        plot_type (str): Type of plot for benchmark visualisation.
     """
-    gene_analysis_runs = Config(runs=[run for run in run_config.runs if run.gene_analysis])
-    variant_analysis_runs = Config(runs=[run for run in run_config.runs if run.variant_analysis])
-    disease_analysis_runs = Config(runs=[run for run in run_config.runs if run.disease_analysis])
-    if gene_analysis_runs:
+    gene_analysis_runs = Config(benchmark_name=run_config.benchmark_name,
+                                runs=[run for run in run_config.runs if run.gene_analysis],
+                                plot_customisation=run_config.plot_customisation)
+    variant_analysis_runs = Config(benchmark_name=run_config.benchmark_name,
+                                   runs=[run for run in run_config.runs if run.variant_analysis],
+                                   plot_customisation=run_config.plot_customisation)
+    disease_analysis_runs = Config(benchmark_name=run_config.benchmark_name,
+                                   runs=[run for run in run_config.runs if run.disease_analysis],
+                                   plot_customisation=run_config.plot_customisation)
+    if gene_analysis_runs.runs:
         _run_benchmark_comparison(
             run_config=gene_analysis_runs,
             score_order=score_order,
-            output_prefix=output_prefix,
             threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=GeneBenchmarkRunOutputGenerator(),
+            benchmark_generator=GeneBenchmarkRunOutputGenerator(
+                plot_customisation=gene_analysis_runs.plot_customisation.gene_plots),
         )
-    if variant_analysis_runs:
+    if variant_analysis_runs.runs:
         _run_benchmark_comparison(
             run_config=variant_analysis_runs,
             score_order=score_order,
-            output_prefix=output_prefix,
             threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=VariantBenchmarkRunOutputGenerator(),
+            benchmark_generator=VariantBenchmarkRunOutputGenerator(
+                plot_customisation=variant_analysis_runs.plot_customisation.variant_plots),
         )
-    if disease_analysis_runs:
+    if disease_analysis_runs.runs:
         _run_benchmark_comparison(
             run_config=disease_analysis_runs,
             score_order=score_order,
-            output_prefix=output_prefix,
             threshold=threshold,
-            plot_type=plot_type,
-            benchmark_generator=DiseaseBenchmarkRunOutputGenerator(),
+            benchmark_generator=DiseaseBenchmarkRunOutputGenerator(
+                plot_customisation=disease_analysis_runs.plot_customisation.disease_plots),
         )

From 5e180744173a9c5e53ad8de5a319017538d04301 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:27:18 +0100
Subject: [PATCH 52/81] add plot customisation

---
 src/pheval/analyse/benchmark_generator.py | 38 +++++++++++++----------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index 9441022ad..56da9e489 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -4,7 +4,7 @@
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.disease_prioritisation_analysis import benchmark_disease_prioritisation
 from pheval.analyse.gene_prioritisation_analysis import benchmark_gene_prioritisation
-from pheval.analyse.run_data_parser import RunConfig
+from pheval.analyse.run_data_parser import RunConfig, SinglePlotCustomisation
 from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
 from pheval.constants import (
     DISEASE_PLOT_Y_LABEL,
@@ -21,18 +21,19 @@ class BenchmarkRunOutputGenerator:
     """Base class for recording data required for generating benchmarking outputs.
 
     Attributes:
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
         prioritisation_type_string (str):  Prioritisation type string.
         y_label (str): Label for the y-axis in benchmarking outputs.
         generate_benchmark_run_results (Callable): Callable to generate benchmark run results.
             Takes parameters: input and results directory, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the rank comparison file.
+        stats_comparison_file (str): Suffix for the rank comparison file.
     """
-
+    plot_customisation: SinglePlotCustomisation
     prioritisation_type_string: str
     y_label: str
-    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults]
-    stats_comparison_file_suffix: str
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults]
+    stats_comparison_file: str
 
 
 @dataclass
@@ -45,6 +46,7 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for gene prioritisation benchmarking.
 
     Attributes:
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
         prioritisation_type_string (str): Prioritisation type string.
             Defaults to GENE_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in gene prioritisation benchmarking outputs.
@@ -53,16 +55,16 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             benchmark run results. Defaults to benchmark_gene_prioritisation.
             Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the gene rank comparison file.
+        stats_comparison_file (str): Suffix for the gene rank comparison file.
             Defaults to "-gene_summary".
     """
-
+    plot_customisation: SinglePlotCustomisation = None
     prioritisation_type_string: str = GENE_PRIORITISATION_TYPE_STR
     y_label: str = GENE_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults] = (
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
         benchmark_gene_prioritisation
     )
-    stats_comparison_file_suffix: str = "-gene_summary"
+    stats_comparison_file: str = "gene_summary"
 
 
 @dataclass
@@ -75,6 +77,7 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for variant prioritisation benchmarking.
 
     Attributes:
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
         prioritisation_type_string (str): Prioritisation type string.
             Defaults to VARIANT_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in variant prioritisation benchmarking outputs.
@@ -83,17 +86,17 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             benchmark run results. Defaults to benchmark_variant_prioritisation.
             Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the variant rank comparison file.
+        stats_comparison_file (str): Suffix for the variant rank comparison file.
             Defaults to "-variant_summary".
 
     """
-
+    plot_customisation: SinglePlotCustomisation = None
     prioritisation_type_string: str = VARIANT_PRIORITISATION_TYPE_STR
     y_label: str = VARIANT_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults] = (
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
         benchmark_variant_prioritisation
     )
-    stats_comparison_file_suffix: str = "-variant_summary"
+    stats_comparison_file: str = "variant_summary"
 
 
 @dataclass
@@ -106,6 +109,7 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     specifically for disease prioritisation benchmarking.
 
     Attributes:
+        plot_customisation (SinglePlotCustomisation): Customisation for plot.
         prioritisation_type_string (str): Prioritisation type string.
             Defaults to DISEASE_PRIORITISATION_TYPE_STR.
         y_label (str): Label for the y-axis in disease prioritisation benchmarking outputs.
@@ -114,13 +118,13 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             benchmark run results. Defaults to benchmark_disease_prioritisation.
             Takes parameters: run configuration, score order, threshold, rank comparison,
             and returns BenchmarkRunResults.
-        stats_comparison_file_suffix (str): Suffix for the disease rank comparison file.
+        stats_comparison_file (str): Suffix for the disease rank comparison file.
             Defaults to "-disease_summary".
     """
-
+    plot_customisation: SinglePlotCustomisation = None
     prioritisation_type_string: str = DISEASE_PRIORITISATION_TYPE_STR
     y_label: str = DISEASE_PLOT_Y_LABEL
-    generate_benchmark_run_results: Callable[[RunConfig, str, float], BenchmarkRunResults] = (
+    generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
         benchmark_disease_prioritisation
     )
-    stats_comparison_file_suffix: str = "-disease_summary"
+    stats_comparison_file: str = "disease_summary"

From a2506857d3d59c503bfb0d9184ef0dae36eb22be Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:28:18 +0100
Subject: [PATCH 53/81] add benchmark name to access db

---
 src/pheval/analyse/disease_prioritisation_analysis.py | 5 ++++-
 src/pheval/analyse/gene_prioritisation_analysis.py    | 5 ++++-
 src/pheval/analyse/variant_prioritisation_analysis.py | 5 ++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/pheval/analyse/disease_prioritisation_analysis.py b/src/pheval/analyse/disease_prioritisation_analysis.py
index 6a92e45b1..9a4d29f79 100644
--- a/src/pheval/analyse/disease_prioritisation_analysis.py
+++ b/src/pheval/analyse/disease_prioritisation_analysis.py
@@ -189,6 +189,7 @@ def assess_phenopacket_disease_prioritisation(
 
 
 def benchmark_disease_prioritisation(
+    benchmark_name: str,
     run: RunConfig,
     score_order: str,
     threshold: float,
@@ -197,6 +198,7 @@ def benchmark_disease_prioritisation(
     Benchmark a directory based on disease prioritisation results.
 
     Args:
+        benchmark_name (str): Name of the benchmark.
         run (RunConfig): Run configuration.
         score_order (str): The order in which scores are arranged.
         threshold (float): Threshold for assessment.
@@ -206,7 +208,7 @@ def benchmark_disease_prioritisation(
         including ranks and rank statistics for the benchmarked directory.
     """
     disease_binary_classification_stats = BinaryClassificationStats()
-    db_connection = DBConnector()
+    db_connection = DBConnector(benchmark_name)
     db_connection.initialise()
     disease_benchmarker = AssessDiseasePrioritisation(
         db_connection,
@@ -225,6 +227,7 @@ def benchmark_disease_prioritisation(
     db_connection.close()
     disease_rank_stats = RankStats()
     disease_rank_stats.add_ranks(
+        benchmark_name=benchmark_name,
         table_name=f"{run.phenopacket_dir.parents[0].name}_disease",
         column_name=str(run.run_identifier),
     )
diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index 689f02b70..adb262062 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -173,6 +173,7 @@ def assess_phenopacket_gene_prioritisation(
 
 
 def benchmark_gene_prioritisation(
+    benchmark_name: str,
     run: RunConfig,
     score_order: str,
     threshold: float,
@@ -180,6 +181,7 @@ def benchmark_gene_prioritisation(
     """
     Benchmark a directory based on gene prioritisation results.
      Args:
+         benchmark_name (str): Name of the benchmark.
          run (RunConfig): Run configuration.
          score_order (str): The order in which scores are arranged.
          threshold (float): Threshold for assessment.
@@ -188,7 +190,7 @@ def benchmark_gene_prioritisation(
          including ranks and rank statistics for the benchmarked directory.
     """
     gene_binary_classification_stats = BinaryClassificationStats()
-    db_connection = DBConnector()
+    db_connection = DBConnector(benchmark_name)
     db_connection.initialise()
     gene_benchmarker = AssessGenePrioritisation(
         db_connection,
@@ -207,6 +209,7 @@ def benchmark_gene_prioritisation(
     db_connection.close()
     gene_rank_stats = RankStats()
     gene_rank_stats.add_ranks(
+        benchmark_name=benchmark_name,
         table_name=f"{run.phenopacket_dir.parents[0].name}_gene",
         column_name=str(run.run_identifier),
     )
diff --git a/src/pheval/analyse/variant_prioritisation_analysis.py b/src/pheval/analyse/variant_prioritisation_analysis.py
index c76998c04..ae9afdc77 100644
--- a/src/pheval/analyse/variant_prioritisation_analysis.py
+++ b/src/pheval/analyse/variant_prioritisation_analysis.py
@@ -195,6 +195,7 @@ def assess_phenopacket_variant_prioritisation(
 
 
 def benchmark_variant_prioritisation(
+    benchmark_name: str,
     run: RunConfig,
     score_order: str,
     threshold: float,
@@ -203,6 +204,7 @@ def benchmark_variant_prioritisation(
     Benchmark a directory based on variant prioritisation results.
 
     Args:
+        benchmark_name (str): Name of the benchmark.
         run (RunConfig): Run configuration.
         score_order (str): The order in which scores are arranged.
         threshold (float): Threshold for assessment.
@@ -212,7 +214,7 @@ def benchmark_variant_prioritisation(
         including ranks and rank statistics for the benchmarked directory.
     """
     variant_binary_classification_stats = BinaryClassificationStats()
-    db_connection = DBConnector()
+    db_connection = DBConnector(benchmark_name)
     variant_benchmarker = AssessVariantPrioritisation(
         db_connection,
         f"{run.phenopacket_dir.parents[0].name}" f"_variant",
@@ -229,6 +231,7 @@ def benchmark_variant_prioritisation(
         )
     variant_rank_stats = RankStats()
     variant_rank_stats.add_ranks(
+        benchmark_name=benchmark_name,
         table_name=f"{run.phenopacket_dir.parents[0].name}_variant",
         column_name=str(run.run_identifier),
     )

From 7de4d6f6d6e5f5c749e5ede961929279e035ca3d Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:28:39 +0100
Subject: [PATCH 54/81] implement plot customisation

---
 src/pheval/analyse/generate_plots.py | 50 ++++++++++++----------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index 05268789b..112a52391 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -145,7 +145,6 @@ def generate_stacked_bar_plot(
         self,
         benchmarking_results: List[BenchmarkRunResults],
         benchmark_generator: BenchmarkRunOutputGenerator,
-        title: str = None,
     ) -> None:
         """
         Generate a stacked bar plot and Mean Reciprocal Rank (MRR) bar plot.
@@ -153,7 +152,6 @@ def generate_stacked_bar_plot(
         Args:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-            title (str, optional): Title for the generated plot. Defaults to None.
         """
         for benchmark_result in benchmarking_results:
             self._generate_stacked_bar_plot_data(benchmark_result)
@@ -167,10 +165,10 @@ def generate_stacked_bar_plot(
             ylabel=benchmark_generator.y_label,
             edgecolor="white",
         ).legend(loc="center left", bbox_to_anchor=(1.0, 0.5))
-        if title is None:
+        if benchmark_generator.plot_customisation.rank_plot_title is None:
             plt.title(f"{benchmark_generator.prioritisation_type_string.capitalize()} Rank Stats")
         else:
-            plt.title(title, loc="center", fontsize=15)
+            plt.title(benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15)
         plt.ylim(0, 100)
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
@@ -253,7 +251,6 @@ def generate_cumulative_bar(
         self,
         benchmarking_results: List[BenchmarkRunResults],
         benchmark_generator: BenchmarkRunOutputGenerator,
-        title: str = None,
     ) -> None:
         """
         Generate a cumulative bar plot.
@@ -261,7 +258,6 @@ def generate_cumulative_bar(
         Args:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-            title (str, optional): Title for the generated plot. Defaults to None.
         """
         for benchmark_result in benchmarking_results:
             self._generate_cumulative_bar_plot_data(benchmark_result)
@@ -278,12 +274,12 @@ def generate_cumulative_bar(
             legend=False,
         ).set(xlabel="Rank", ylabel=benchmark_generator.y_label)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=3, title="Run")
-        if title is None:
+        if benchmark_generator.plot_customisation.rank_plot_title is None:
             plt.title(
                 f"{benchmark_generator.prioritisation_type_string.capitalize()} Cumulative Rank Stats"
             )
         else:
-            plt.title(title, loc="center", fontsize=15)
+            plt.title(benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15)
         plt.ylim(0, 1)
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
@@ -389,7 +385,10 @@ def generate_roc_curve(
         plt.plot(linestyle="--", color="gray")
         plt.xlabel("False Positive Rate")
         plt.ylabel("True Positive Rate")
-        plt.title("Receiver Operating Characteristic (ROC) Curve")
+        if benchmark_generator.plot_customisation.roc_curve_title is None:
+            plt.title("Receiver Operating Characteristic (ROC) Curve")
+        else:
+            plt.title(benchmark_generator.plot_customisation.roc_curve_title)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_roc_curve.svg",
@@ -428,7 +427,10 @@ def generate_precision_recall(
         plt.plot(linestyle="--", color="gray")
         plt.xlabel("Recall")
         plt.ylabel("Precision")
-        plt.title("Precision-Recall Curve")
+        if benchmark_generator.plot_customisation.precision_recall_title is None:
+            plt.title("Precision-Recall Curve")
+        else:
+            plt.title(benchmark_generator.plot_customisation.precision_recall_title)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_precision_recall_curve.svg",
@@ -440,7 +442,6 @@ def generate_non_cumulative_bar(
         self,
         benchmarking_results: List[BenchmarkRunResults],
         benchmark_generator: BenchmarkRunOutputGenerator,
-        title: str = None,
     ) -> None:
         """
         Generate a non-cumulative bar plot.
@@ -448,7 +449,6 @@ def generate_non_cumulative_bar(
         Args:
             benchmarking_results (List[BenchmarkRunResults]): List of benchmarking results for multiple runs.
             benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-            title (str, optional): Title for the generated plot. Defaults to None.
         """
         plt.clf()
         for benchmark_result in benchmarking_results:
@@ -466,12 +466,12 @@ def generate_non_cumulative_bar(
             legend=False,
         ).set(xlabel="Rank", ylabel=benchmark_generator.y_label)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15), ncol=3, title="Run")
-        if title is None:
+        if benchmark_generator.plot_customisation.rank_plot_title is None:
             plt.title(
                 f"{benchmark_generator.prioritisation_type_string.capitalize()} Non-Cumulative Rank Stats"
             )
         else:
-            plt.title(title, loc="center", fontsize=15)
+            plt.title(benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15)
         plt.ylim(0, 1)
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
@@ -483,8 +483,6 @@ def generate_non_cumulative_bar(
 def generate_plots(
     benchmarking_results: List[BenchmarkRunResults],
     benchmark_generator: BenchmarkRunOutputGenerator,
-    plot_type: str,
-    title: str = None,
     generate_from_tsv: bool = False,
 ) -> None:
     """
@@ -495,20 +493,18 @@ def generate_plots(
     Args:
         benchmarking_results (list[BenchmarkRunResults]): List of benchmarking results for multiple runs.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-        plot_type (str): Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").
-        title (str, optional): Title for the generated plot. Defaults to None.
         generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False.
     """
     plot_generator = PlotGenerator()
     if not generate_from_tsv:
         plot_generator.generate_roc_curve(benchmarking_results, benchmark_generator)
         plot_generator.generate_precision_recall(benchmarking_results, benchmark_generator)
-    if plot_type == "bar_stacked":
-        plot_generator.generate_stacked_bar_plot(benchmarking_results, benchmark_generator, title)
-    elif plot_type == "bar_cumulative":
-        plot_generator.generate_cumulative_bar(benchmarking_results, benchmark_generator, title)
-    elif plot_type == "bar_non_cumulative":
-        plot_generator.generate_non_cumulative_bar(benchmarking_results, benchmark_generator, title)
+    if benchmark_generator.plot_customisation.plot_type == "bar_stacked":
+        plot_generator.generate_stacked_bar_plot(benchmarking_results, benchmark_generator)
+    elif benchmark_generator.plot_customisation.plot_type == "bar_cumulative":
+        plot_generator.generate_cumulative_bar(benchmarking_results, benchmark_generator)
+    elif benchmark_generator.plot_customisation.plot_type == "bar_non_cumulative":
+        plot_generator.generate_non_cumulative_bar(benchmarking_results, benchmark_generator)
 
 
 def generate_plots_from_benchmark_summary_tsv(
@@ -516,8 +512,6 @@ def generate_plots_from_benchmark_summary_tsv(
     gene_analysis: bool,
     variant_analysis: bool,
     disease_analysis: bool,
-    plot_type: str,
-    title: str,
 ):
     """
     Generate bar plot from summary benchmark results.
@@ -530,8 +524,6 @@ def generate_plots_from_benchmark_summary_tsv(
         gene_analysis (bool): Flag indicating whether to analyse gene prioritisation.
         variant_analysis (bool): Flag indicating whether to analyse variant prioritisation.
         disease_analysis (bool): Flag indicating whether to analyse disease prioritisation.
-        plot_type (str): Type of plot to be generated ("bar_stacked", "bar_cumulative", "bar_non_cumulative").
-        title (str): Title for the generated plot.
     Raises:
          ValueError: If an unsupported plot type is specified.
     """
@@ -547,4 +539,4 @@ def generate_plots_from_benchmark_summary_tsv(
         raise ValueError(
             "Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)"
         )
-    generate_plots(benchmarking_results, benchmark_generator, plot_type, title, True)
+    generate_plots(benchmarking_results, benchmark_generator, True)

From cfb05f11016684b5c7b6d744d3cfd35b1487c101 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:28:51 +0100
Subject: [PATCH 55/81] remove redundant method for generating single output

---
 .../analyse/generate_summary_outputs.py       | 40 ++-----------------
 1 file changed, 3 insertions(+), 37 deletions(-)

diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index e6b0cda2e..abf217ef8 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -8,39 +8,6 @@
 from pheval.constants import RANK_COMPARISON_SUFFIX
 
 
-def generate_benchmark_output(
-    benchmarking_results: BenchmarkRunResults,
-    plot_type: str,
-    benchmark_generator: BenchmarkRunOutputGenerator,
-) -> None:
-    """
-    Generate prioritisation outputs for a single benchmarking run.
-
-    Args:
-        benchmarking_results (BenchmarkRunResults): Results of a benchmarking run.
-        plot_type (str): Type of plot to generate.
-        benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-    """
-    results_dir_name = (
-        benchmarking_results.results_dir.name
-        if benchmarking_results.results_dir
-        else benchmarking_results.benchmark_name
-    )
-    conn = DBConnector().conn
-    conn.execute(
-        f"CREATE TABLE {results_dir_name}_{benchmark_generator.prioritisation_type_string}{RANK_COMPARISON_SUFFIX} "
-        f"AS SELECT * EXCLUDE (identifier) FROM "
-        f"{benchmarking_results.phenopacket_dir.parents[0].name}_{benchmark_generator.prioritisation_type_string}"
-    )
-
-    conn.close()
-    generate_plots(
-        [benchmarking_results],
-        benchmark_generator,
-        plot_type,
-    )
-
-
 def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_prefix: str) -> str:
     """
     Get the new table name for rank comparison tables.
@@ -94,9 +61,9 @@ def create_comparison_table(
 
 
 def generate_benchmark_comparison_output(
+    benchmark_name: str,
     benchmarking_results: List[BenchmarkRunResults],
     run_identifiers: List[str],
-    plot_type: str,
     benchmark_generator: BenchmarkRunOutputGenerator,
     table_name: str,
 ) -> None:
@@ -108,15 +75,15 @@ def generate_benchmark_comparison_output(
     comparison outputs using `RankComparisonGenerator` for each pair.
 
     Args:
+        benchmark_name (str): Name of the benchmark.
         benchmarking_results (List[BenchmarkRunResults]): A list containing BenchmarkRunResults instances
             representing the benchmarking results of multiple runs.
         run_identifiers (List[str]): A list of run identifiers.
-        plot_type (str): The type of plot to be generated.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
         table_name (str): The name of the table where ranks are stored.
     """
     output_prefix = benchmark_generator.prioritisation_type_string
-    connector = DBConnector()
+    connector = DBConnector(benchmark_name)
     for pair in itertools.combinations(
         [str(result.benchmark_name) for result in benchmarking_results], 2
     ):
@@ -137,5 +104,4 @@ def generate_benchmark_comparison_output(
     generate_plots(
         benchmarking_results,
         benchmark_generator,
-        plot_type,
     )

From 5588f462e258a101471bf4a4e2e67b2fc2e54b09 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:29:05 +0100
Subject: [PATCH 56/81] allow for naming of output db

---
 src/pheval/analyse/get_connection.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index f0b4a33fc..e03d51450 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -17,22 +17,22 @@ class DBConnector:
     Class to connect to database.
     """
 
-    def __init__(self):
-        """Initialize the DBConnector class."""
-        self.conn = self.get_connection()
+    def __init__(self, benchmark_name: str):
+        """Initialise the DBConnector class."""
+        self.conn = self.get_connection(f"{benchmark_name}.db")
 
     def initialise(self):
         """Initialise the duckdb connection."""
         self.add_contains_function()
 
     @staticmethod
-    def get_connection() -> DuckDBPyConnection:
+    def get_connection(db_name: str) -> DuckDBPyConnection:
         """
         Get a connection to the database.
         Returns:
             DuckDBPyConnection: Connection to the database.
         """
-        conn = duckdb.connect("analysis.db")
+        conn = duckdb.connect(db_name)
         return conn
 
     def add_column_integer_default(self, table_name: str, column: str, default: int = 0) -> None:

From 3a9378191aa8155e5153793909ccb6f998bcf37d Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:29:22 +0100
Subject: [PATCH 57/81] add benchmark name

---
 src/pheval/analyse/parse_corpus.py |  4 ++--
 src/pheval/analyse/rank_stats.py   | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
index e70846323..de2844e3f 100644
--- a/src/pheval/analyse/parse_corpus.py
+++ b/src/pheval/analyse/parse_corpus.py
@@ -65,14 +65,14 @@ def _obtain_causative_genes(phenopacket_path: Path) -> List[ProbandCausativeGene
 class CorpusParser:
     """Class for parsing phenopacket corpus and retrieving known variants/genes/diseases."""
 
-    def __init__(self, phenopacket_dir: Path) -> None:
+    def __init__(self, benchmark_name: str, phenopacket_dir: Path) -> None:
         """
         Initialise the CorpusParser class.
         Args:
             phenopacket_dir (Path): Path to the Phenopacket directory.
         """
         self.phenopacket_dir = phenopacket_dir
-        self.conn = DBConnector().conn
+        self.conn = DBConnector(benchmark_name).conn
         self.table_name = phenopacket_dir.parents[0].name
 
     def _create_gene_table(self) -> None:
diff --git a/src/pheval/analyse/rank_stats.py b/src/pheval/analyse/rank_stats.py
index 37f0d9c04..755b44fb6 100644
--- a/src/pheval/analyse/rank_stats.py
+++ b/src/pheval/analyse/rank_stats.py
@@ -36,14 +36,14 @@ class RankStats:
     relevant_result_ranks: List[List[int]] = field(default_factory=list)
     mrr: float = None
 
-    def add_ranks(self, table_name: str, column_name: str) -> None:
+    def add_ranks(self, benchmark_name: str, table_name: str, column_name: str) -> None:
         """
         Add ranks to RankStats instance from table.
         Args:
             table_name (str): Name of the table to add ranks from.
             column_name (str): Name of the column to add ranks from.:
         """
-        conn = DBConnector().conn
+        conn = DBConnector(benchmark_name).conn
         self.top = self._execute_count_query(conn, table_name, column_name, " = 1")
         self.top3 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 3")
         self.top5 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 5")
@@ -327,7 +327,7 @@ def mean_normalised_discounted_cumulative_gain(self, k: int) -> float:
 class RankStatsWriter:
     """Class for writing the rank stats to a file."""
 
-    def __init__(self, table_name: str):
+    def __init__(self, benchmark_name: str, table_name: str):
         """
         Initialise the RankStatsWriter class
         Args:
@@ -335,7 +335,8 @@ def __init__(self, table_name: str):
         """
 
         self.table_name = table_name
-        conn = DBConnector().conn
+        self.benchmark_name = benchmark_name
+        conn = DBConnector(benchmark_name).conn
         conn.execute(
             f'CREATE TABLE IF NOT EXISTS "{self.table_name}" ('
             f"results_directory_path VARCHAR,"
@@ -396,7 +397,7 @@ def add_statistics_entry(
             rank_stats (RankStats): RankStats object for the run.
             binary_classification (BinaryClassificationStats): BinaryClassificationStats object for the run.
         """
-        conn = DBConnector().conn
+        conn = DBConnector(self.benchmark_name).conn
         conn.execute(
             f' INSERT INTO "{self.table_name}" VALUES ( '
             f"'{run_identifier}',"

From edf6faaa0b981d931cbe0336558577725ef32274 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:29:37 +0100
Subject: [PATCH 58/81] add classes for parsing benchmarking yaml file for plot
 customisation

---
 src/pheval/analyse/run_data_parser.py | 34 +++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/src/pheval/analyse/run_data_parser.py b/src/pheval/analyse/run_data_parser.py
index bf2f9ba62..8bc312d31 100644
--- a/src/pheval/analyse/run_data_parser.py
+++ b/src/pheval/analyse/run_data_parser.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import List
+from typing import List, Optional
 
 import yaml
 from pydantic import BaseModel
@@ -26,14 +26,44 @@ class RunConfig(BaseModel):
     disease_analysis: bool
 
 
+class SinglePlotCustomisation(BaseModel):
+    """
+    Store customisations for plots.
+
+    Attributes:
+        plot_type (str): The plot type.
+        rank_plot_title (str): The title for the rank summary plot.
+        roc_curve_title (str): The title for the roc curve plot.
+        precision_recall_title (str): The title for the precision-recall plot.
+    """
+    plot_type: Optional[str]
+    rank_plot_title: Optional[str]
+    roc_curve_title: Optional[str]
+    precision_recall_title: Optional[str]
+
+
+class PlotCustomisation(BaseModel):
+    """
+    Store customisations for all plots.
+    Attributes:
+        gene_plots (SinglePlotCustomisation): Customisation for all gene benchmarking plots.
+        disease_plots (SinglePlotCustomisation): Customisation for all disease benchmarking plots.
+        variant_plots (SinglePlotCustomisation): Customisation for all variant benchmarking plots.
+    """
+    gene_plots: SinglePlotCustomisation
+    disease_plots: SinglePlotCustomisation
+    variant_plots: SinglePlotCustomisation
+
+
 class Config(BaseModel):
     """
     Store configurations for a runs.
     Attributes:
         runs (List[RunConfig]): The list of run configurations.
     """
-
+    benchmark_name: str
     runs: List[RunConfig]
+    plot_customisation: PlotCustomisation
 
 
 def parse_run_config(run_data_path: Path) -> Config:

From 209eafa9d220b8faec0ba9a225b100548d9567e5 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:30:00 +0100
Subject: [PATCH 59/81] collapse benchmarking commands into a single command
 that can benchmark any number of runs

---
 src/pheval/cli.py              |   4 +-
 src/pheval/cli_pheval_utils.py | 220 +++++++--------------------------
 2 files changed, 44 insertions(+), 180 deletions(-)

diff --git a/src/pheval/cli.py b/src/pheval/cli.py
index 2a12fc058..f18318d9e 100644
--- a/src/pheval/cli.py
+++ b/src/pheval/cli.py
@@ -7,7 +7,7 @@
 from .cli_pheval import run
 from .cli_pheval_utils import (
     benchmark,
-    benchmark_comparison,
+    benchmark,
     create_spiked_vcfs_command,
     generate_stats_plot,
     prepare_corpus_command,
@@ -58,7 +58,7 @@ def pheval_utils():
 pheval_utils.add_command(update_phenopackets_command)
 pheval_utils.add_command(create_spiked_vcfs_command)
 pheval_utils.add_command(benchmark)
-pheval_utils.add_command(benchmark_comparison)
+pheval_utils.add_command(benchmark)
 pheval_utils.add_command(semsim_to_exomiserdb_command)
 pheval_utils.add_command(generate_stats_plot)
 pheval_utils.add_command(prepare_corpus_command)
diff --git a/src/pheval/cli_pheval_utils.py b/src/pheval/cli_pheval_utils.py
index 5ad37ad35..9f81a2696 100644
--- a/src/pheval/cli_pheval_utils.py
+++ b/src/pheval/cli_pheval_utils.py
@@ -5,9 +5,9 @@
 
 import click
 
-from pheval.analyse.analysis import benchmark_directory, benchmark_run_comparisons
+from pheval.analyse.analysis import benchmark_run_comparisons
 from pheval.analyse.generate_plots import generate_plots_from_benchmark_summary_tsv
-from pheval.analyse.run_data_parser import RunConfig, parse_run_config
+from pheval.analyse.run_data_parser import parse_run_config
 from pheval.prepare.create_noisy_phenopackets import scramble_phenopackets
 from pheval.prepare.create_spiked_vcf import spike_vcfs
 from pheval.prepare.custom_exceptions import InputError, MutuallyExclusiveOptionError
@@ -56,7 +56,7 @@
     that will be applied to semantic similarity score column (e.g. jaccard similarity).""",
 )
 def semsim_scramble_command(
-    input: Path, output: Path, score_column: List[str], scramble_factor: float
+        input: Path, output: Path, score_column: List[str], scramble_factor: float
 ):
     """Scrambles semsim profile multiplying score value by scramble factor
     Args:
@@ -107,10 +107,10 @@ def semsim_scramble_command(
     type=Path,
 )
 def scramble_phenopackets_command(
-    phenopacket_path: Path,
-    phenopacket_dir: Path,
-    scramble_factor: float,
-    output_dir: Path,
+        phenopacket_path: Path,
+        phenopacket_dir: Path,
+        scramble_factor: float,
+        output_dir: Path,
 ):
     """Generate noisy phenopackets from existing ones."""
     if phenopacket_path is None and phenopacket_dir is None:
@@ -161,11 +161,11 @@ def scramble_phenopackets_command(
     help="Output path for the difference tsv. Defaults to percentage_diff.semsim.tsv",
 )
 def semsim_comparison(
-    semsim_left: Path,
-    semsim_right: Path,
-    score_column: str,
-    analysis: str,
-    output: Path = "percentage_diff.semsim.tsv",
+        semsim_left: Path,
+        semsim_right: Path,
+        score_column: str,
+        analysis: str,
+        output: Path = "percentage_diff.semsim.tsv",
 ):
     """Compares two semantic similarity profiles
 
@@ -222,7 +222,7 @@ def semsim_comparison(
     type=click.Choice(["ensembl_id", "entrez_id", "hgnc_id"]),
 )
 def update_phenopackets_command(
-    phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path, gene_identifier: str
+        phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path, gene_identifier: str
 ):
     """Update gene symbols and identifiers for phenopackets."""
     if phenopacket_path is None and phenopacket_dir is None:
@@ -299,13 +299,13 @@ def update_phenopackets_command(
     type=Path,
 )
 def create_spiked_vcfs_command(
-    phenopacket_path: Path,
-    phenopacket_dir: Path,
-    output_dir: Path,
-    hg19_template_vcf: Path = None,
-    hg38_template_vcf: Path = None,
-    hg19_vcf_dir: Path = None,
-    hg38_vcf_dir: Path = None,
+        phenopacket_path: Path,
+        phenopacket_dir: Path,
+        output_dir: Path,
+        hg19_template_vcf: Path = None,
+        hg38_template_vcf: Path = None,
+        hg19_vcf_dir: Path = None,
+        hg38_vcf_dir: Path = None,
 ):
     """
     Create spiked VCF from either a Phenopacket or a Phenopacket directory.
@@ -332,139 +332,15 @@ def create_spiked_vcfs_command(
     )
 
 
-@click.command()
-@click.option(
-    "--directory",
-    "-d",
-    required=True,
-    metavar="PATH",
-    help="General results directory to be benchmarked, assumes contains subdirectories of pheval_gene_results/,"
-    "pheval_variant_results/ or pheval_disease_results/. ",
-    type=Path,
-)
-@click.option(
-    "--run-identifier",
-    "-r",
-    required=True,
-    metavar="STRING",
-    help="The run identifier.",
-    type=str,
-)
-@click.option(
-    "--phenopacket-dir",
-    "-p",
-    required=True,
-    metavar="PATH",
-    help="Full path to directory containing input phenopackets.",
-    type=Path,
-)
-@click.option(
-    "--output-prefix",
-    "-o",
-    metavar="<str>",
-    required=True,
-    help=" Output file prefix. ",
-)
-@click.option(
-    "--score-order",
-    "-so",
-    required=True,
-    help="Ordering of results for ranking.",
-    type=click.Choice(["ascending", "descending"]),
-    default="descending",
-    show_default=True,
-)
-@click.option(
-    "--threshold",
-    "-t",
-    metavar="<float>",
-    default=float(0.0),
-    required=False,
-    help="Score threshold.",
-    type=float,
-)
-@click.option(
-    "--gene-analysis/--no-gene-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for gene prioritisation",
-)
-@click.option(
-    "--variant-analysis/--no-variant-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for variant prioritisation",
-)
-@click.option(
-    "--disease-analysis/--no-disease-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for disease prioritisation",
-)
-@click.option(
-    "--plot-type",
-    "-y",
-    default="bar_stacked",
-    show_default=True,
-    type=click.Choice(["bar_stacked", "bar_cumulative", "bar_non_cumulative"]),
-    help="Bar chart type to output.",
-)
-def benchmark(
-    directory: Path,
-    run_identifier: str,
-    phenopacket_dir: Path,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
-):
-    """Benchmark the gene/variant/disease prioritisation performance for a single run."""
-    if not gene_analysis and not variant_analysis and not disease_analysis:
-        raise InputError("Need to specify at least one of gene/variant/disease analysis.")
-    benchmark_directory(
-        RunConfig(
-            run_identifier=run_identifier,
-            phenopacket_dir=phenopacket_dir,
-            results_dir=directory,
-            gene_analysis=gene_analysis,
-            variant_analysis=variant_analysis,
-            disease_analysis=disease_analysis,
-        ),
-        score_order,
-        output_prefix,
-        threshold,
-        plot_type,
-    )
-
-
 @click.command()
 @click.option(
     "--run-data",
     "-r",
     required=True,
     metavar="PATH",
-    help="Path to .txt file containing testdata phenopacket directory "
-    "and corresponding results directory separated by tab."
-    "Each run contained to a new line with the input testdata listed first and on the same line separated by a tab"
-    "the results directory.",
+    help="Path to yaml configuration file for benchmarking.",
     type=Path,
 )
-@click.option(
-    "--output-prefix",
-    "-o",
-    metavar="<str>",
-    required=True,
-    help=" Output file prefix. ",
-)
 @click.option(
     "--score-order",
     "-so",
@@ -483,28 +359,16 @@ def benchmark(
     help="Score threshold.",
     type=float,
 )
-@click.option(
-    "--plot-type",
-    "-y",
-    default="bar_cumulative",
-    show_default=True,
-    type=click.Choice(["bar_stacked", "bar_cumulative", "bar_non_cumulative"]),
-    help="Bar chart type to output.",
-)
-def benchmark_comparison(
-    run_data: Path,
-    score_order: str,
-    output_prefix: str,
-    threshold: float,
-    plot_type: str,
+def benchmark(
+        run_data: Path,
+        score_order: str,
+        threshold: float,
 ):
     """Benchmark the gene/variant/disease prioritisation performance for two runs."""
     benchmark_run_comparisons(
         parse_run_config(run_data),
         score_order,
-        output_prefix,
         threshold,
-        plot_type,
     )
 
 
@@ -542,7 +406,7 @@ def benchmark_comparison(
     type=Path,
 )
 def semsim_to_exomiserdb_command(
-    input_file: Path, object_prefix: str, subject_prefix: str, db_path: Path
+        input_file: Path, object_prefix: str, subject_prefix: str, db_path: Path
 ):
     """ingests semsim file into exomiser phenotypic database
 
@@ -609,12 +473,12 @@ def semsim_to_exomiserdb_command(
     help='Title for plot, specify the title on the CLI enclosed with ""',
 )
 def generate_stats_plot(
-    benchmarking_tsv: Path,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
-    title: str = None,
+        benchmarking_tsv: Path,
+        gene_analysis: bool,
+        variant_analysis: bool,
+        disease_analysis: bool,
+        plot_type: str,
+        title: str = None,
 ):
     """Generate bar plot from benchmark stats summary tsv."""
     generate_plots_from_benchmark_summary_tsv(
@@ -712,16 +576,16 @@ def generate_stats_plot(
     type=Path,
 )
 def prepare_corpus_command(
-    phenopacket_dir: Path,
-    variant_analysis: bool,
-    gene_analysis: bool,
-    disease_analysis: bool,
-    gene_identifier: str,
-    hg19_template_vcf: Path,
-    hg38_template_vcf: Path,
-    hg19_vcf_dir: Path,
-    hg38_vcf_dir: Path,
-    output_dir: Path,
+        phenopacket_dir: Path,
+        variant_analysis: bool,
+        gene_analysis: bool,
+        disease_analysis: bool,
+        gene_identifier: str,
+        hg19_template_vcf: Path,
+        hg38_template_vcf: Path,
+        hg19_vcf_dir: Path,
+        hg38_vcf_dir: Path,
+        output_dir: Path,
 ):
     """
     Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating

From ee6915f67a9e0dc8f5cbc87e426424b7bbacf3b7 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:35:47 +0100
Subject: [PATCH 60/81] add missing benchmark_name parameter

---
 tests/test_analysis.py                 | 6 +++---
 tests/test_generate_summary_outputs.py | 2 +-
 tests/test_rank_stats.py               | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 42b6e665d..27c8fb375 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -52,7 +52,7 @@ def setUp(self):
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector()
+        self.db_connector = DBConnector("null")
         self.assess_gene_prioritisation = AssessGenePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_gene",
@@ -192,7 +192,7 @@ def setUp(self):
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector()
+        self.db_connector = DBConnector("None")
         self.assess_variant_prioritisation = AssessVariantPrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_variant",
@@ -361,7 +361,7 @@ def setUp(self):
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector()
+        self.db_connector = DBConnector("None")
         self.assess_disease_prioritisation = AssessDiseasePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_disease",
diff --git a/tests/test_generate_summary_outputs.py b/tests/test_generate_summary_outputs.py
index c8b9ef17c..c2ec3530a 100644
--- a/tests/test_generate_summary_outputs.py
+++ b/tests/test_generate_summary_outputs.py
@@ -43,7 +43,7 @@ def setUp(self):
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector()
+        self.db_connector = DBConnector("None")
 
     def test_create_comparison_table(self):
         create_comparison_table(
diff --git a/tests/test_rank_stats.py b/tests/test_rank_stats.py
index 268f3ee54..d5e92b323 100644
--- a/tests/test_rank_stats.py
+++ b/tests/test_rank_stats.py
@@ -46,7 +46,7 @@ def setUp(self) -> None:
     )
     def test_add_ranks(self, mock_get_connection):
         mock_get_connection.return_value = self.db_connection
-        self.rank_stats.add_ranks("test_table_gene", "results_dir_1")
+        self.rank_stats.add_ranks("None", "test_table_gene", "results_dir_1")
         self.assertEqual(self.rank_stats.top, 1)
         self.assertEqual(self.rank_stats.top3, 2)
         self.assertEqual(self.rank_stats.top5, 3)

From 640e0e23b8789a68ce8b87fdcd8750b858cce491 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 15:36:37 +0100
Subject: [PATCH 61/81] tox lint

---
 src/pheval/analyse/analysis.py            | 59 +++++++++++-------
 src/pheval/analyse/benchmark_generator.py |  4 ++
 src/pheval/analyse/generate_plots.py      | 12 +++-
 src/pheval/analyse/run_data_parser.py     |  3 +
 src/pheval/cli.py                         |  1 -
 src/pheval/cli_pheval_utils.py            | 76 +++++++++++------------
 6 files changed, 89 insertions(+), 66 deletions(-)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index 30c9ff36e..ea50e9c0c 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -4,19 +4,17 @@
     GeneBenchmarkRunOutputGenerator,
     VariantBenchmarkRunOutputGenerator,
 )
-from pheval.analyse.generate_summary_outputs import (
-    generate_benchmark_comparison_output,
-)
+from pheval.analyse.generate_summary_outputs import generate_benchmark_comparison_output
 from pheval.analyse.parse_corpus import CorpusParser
 from pheval.analyse.rank_stats import RankStatsWriter
 from pheval.analyse.run_data_parser import Config
 
 
 def _run_benchmark_comparison(
-        run_config: Config,
-        score_order: str,
-        threshold: float,
-        benchmark_generator: BenchmarkRunOutputGenerator,
+    run_config: Config,
+    score_order: str,
+    threshold: float,
+    benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
     Run a benchmark on several result directories.
@@ -28,10 +26,14 @@ def _run_benchmark_comparison(
         threshold (float): The threshold for benchmark evaluation.
         benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
     """
-    stats_writer = RankStatsWriter(run_config.benchmark_name, benchmark_generator.stats_comparison_file)
+    stats_writer = RankStatsWriter(
+        run_config.benchmark_name, benchmark_generator.stats_comparison_file
+    )
     unique_test_corpora_directories = set([result.phenopacket_dir for result in run_config.runs])
     [
-        CorpusParser(run_config.benchmark_name, test_corpora_directory).parse_corpus(benchmark_generator)
+        CorpusParser(run_config.benchmark_name, test_corpora_directory).parse_corpus(
+            benchmark_generator
+        )
         for test_corpora_directory in unique_test_corpora_directories
     ]
     benchmarking_results = []
@@ -60,9 +62,9 @@ def _run_benchmark_comparison(
 
 
 def benchmark_run_comparisons(
-        run_config: Config,
-        score_order: str,
-        threshold: float,
+    run_config: Config,
+    score_order: str,
+    threshold: float,
 ) -> None:
     """
     Benchmark prioritisation performance for several runs.
@@ -72,22 +74,29 @@ def benchmark_run_comparisons(
         score_order (str): The order in which scores are arranged, this can be either ascending or descending.
         threshold (float): The threshold for benchmark evaluation.
     """
-    gene_analysis_runs = Config(benchmark_name=run_config.benchmark_name,
-                                runs=[run for run in run_config.runs if run.gene_analysis],
-                                plot_customisation=run_config.plot_customisation)
-    variant_analysis_runs = Config(benchmark_name=run_config.benchmark_name,
-                                   runs=[run for run in run_config.runs if run.variant_analysis],
-                                   plot_customisation=run_config.plot_customisation)
-    disease_analysis_runs = Config(benchmark_name=run_config.benchmark_name,
-                                   runs=[run for run in run_config.runs if run.disease_analysis],
-                                   plot_customisation=run_config.plot_customisation)
+    gene_analysis_runs = Config(
+        benchmark_name=run_config.benchmark_name,
+        runs=[run for run in run_config.runs if run.gene_analysis],
+        plot_customisation=run_config.plot_customisation,
+    )
+    variant_analysis_runs = Config(
+        benchmark_name=run_config.benchmark_name,
+        runs=[run for run in run_config.runs if run.variant_analysis],
+        plot_customisation=run_config.plot_customisation,
+    )
+    disease_analysis_runs = Config(
+        benchmark_name=run_config.benchmark_name,
+        runs=[run for run in run_config.runs if run.disease_analysis],
+        plot_customisation=run_config.plot_customisation,
+    )
     if gene_analysis_runs.runs:
         _run_benchmark_comparison(
             run_config=gene_analysis_runs,
             score_order=score_order,
             threshold=threshold,
             benchmark_generator=GeneBenchmarkRunOutputGenerator(
-                plot_customisation=gene_analysis_runs.plot_customisation.gene_plots),
+                plot_customisation=gene_analysis_runs.plot_customisation.gene_plots
+            ),
         )
     if variant_analysis_runs.runs:
         _run_benchmark_comparison(
@@ -95,7 +104,8 @@ def benchmark_run_comparisons(
             score_order=score_order,
             threshold=threshold,
             benchmark_generator=VariantBenchmarkRunOutputGenerator(
-                plot_customisation=variant_analysis_runs.plot_customisation.variant_plots),
+                plot_customisation=variant_analysis_runs.plot_customisation.variant_plots
+            ),
         )
     if disease_analysis_runs.runs:
         _run_benchmark_comparison(
@@ -103,5 +113,6 @@ def benchmark_run_comparisons(
             score_order=score_order,
             threshold=threshold,
             benchmark_generator=DiseaseBenchmarkRunOutputGenerator(
-                plot_customisation=disease_analysis_runs.plot_customisation.disease_plots),
+                plot_customisation=disease_analysis_runs.plot_customisation.disease_plots
+            ),
         )
diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index 56da9e489..1b8b897ae 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -29,6 +29,7 @@ class BenchmarkRunOutputGenerator:
             and returns BenchmarkRunResults.
         stats_comparison_file (str): Suffix for the rank comparison file.
     """
+
     plot_customisation: SinglePlotCustomisation
     prioritisation_type_string: str
     y_label: str
@@ -58,6 +59,7 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
         stats_comparison_file (str): Suffix for the gene rank comparison file.
             Defaults to "-gene_summary".
     """
+
     plot_customisation: SinglePlotCustomisation = None
     prioritisation_type_string: str = GENE_PRIORITISATION_TYPE_STR
     y_label: str = GENE_PLOT_Y_LABEL
@@ -90,6 +92,7 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
             Defaults to "-variant_summary".
 
     """
+
     plot_customisation: SinglePlotCustomisation = None
     prioritisation_type_string: str = VARIANT_PRIORITISATION_TYPE_STR
     y_label: str = VARIANT_PLOT_Y_LABEL
@@ -121,6 +124,7 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
         stats_comparison_file (str): Suffix for the disease rank comparison file.
             Defaults to "-disease_summary".
     """
+
     plot_customisation: SinglePlotCustomisation = None
     prioritisation_type_string: str = DISEASE_PRIORITISATION_TYPE_STR
     y_label: str = DISEASE_PLOT_Y_LABEL
diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index 112a52391..9f02baedd 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -168,7 +168,9 @@ def generate_stacked_bar_plot(
         if benchmark_generator.plot_customisation.rank_plot_title is None:
             plt.title(f"{benchmark_generator.prioritisation_type_string.capitalize()} Rank Stats")
         else:
-            plt.title(benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15)
+            plt.title(
+                benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15
+            )
         plt.ylim(0, 100)
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
@@ -279,7 +281,9 @@ def generate_cumulative_bar(
                 f"{benchmark_generator.prioritisation_type_string.capitalize()} Cumulative Rank Stats"
             )
         else:
-            plt.title(benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15)
+            plt.title(
+                benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15
+            )
         plt.ylim(0, 1)
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
@@ -471,7 +475,9 @@ def generate_non_cumulative_bar(
                 f"{benchmark_generator.prioritisation_type_string.capitalize()} Non-Cumulative Rank Stats"
             )
         else:
-            plt.title(benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15)
+            plt.title(
+                benchmark_generator.plot_customisation.rank_plot_title, loc="center", fontsize=15
+            )
         plt.ylim(0, 1)
         plt.savefig(
             f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
diff --git a/src/pheval/analyse/run_data_parser.py b/src/pheval/analyse/run_data_parser.py
index 8bc312d31..1689499ab 100644
--- a/src/pheval/analyse/run_data_parser.py
+++ b/src/pheval/analyse/run_data_parser.py
@@ -36,6 +36,7 @@ class SinglePlotCustomisation(BaseModel):
         roc_curve_title (str): The title for the roc curve plot.
         precision_recall_title (str): The title for the precision-recall plot.
     """
+
     plot_type: Optional[str]
     rank_plot_title: Optional[str]
     roc_curve_title: Optional[str]
@@ -50,6 +51,7 @@ class PlotCustomisation(BaseModel):
         disease_plots (SinglePlotCustomisation): Customisation for all disease benchmarking plots.
         variant_plots (SinglePlotCustomisation): Customisation for all variant benchmarking plots.
     """
+
     gene_plots: SinglePlotCustomisation
     disease_plots: SinglePlotCustomisation
     variant_plots: SinglePlotCustomisation
@@ -61,6 +63,7 @@ class Config(BaseModel):
     Attributes:
         runs (List[RunConfig]): The list of run configurations.
     """
+
     benchmark_name: str
     runs: List[RunConfig]
     plot_customisation: PlotCustomisation
diff --git a/src/pheval/cli.py b/src/pheval/cli.py
index f18318d9e..3bf4e235a 100644
--- a/src/pheval/cli.py
+++ b/src/pheval/cli.py
@@ -6,7 +6,6 @@
 
 from .cli_pheval import run
 from .cli_pheval_utils import (
-    benchmark,
     benchmark,
     create_spiked_vcfs_command,
     generate_stats_plot,
diff --git a/src/pheval/cli_pheval_utils.py b/src/pheval/cli_pheval_utils.py
index 9f81a2696..1f95176e4 100644
--- a/src/pheval/cli_pheval_utils.py
+++ b/src/pheval/cli_pheval_utils.py
@@ -56,7 +56,7 @@
     that will be applied to semantic similarity score column (e.g. jaccard similarity).""",
 )
 def semsim_scramble_command(
-        input: Path, output: Path, score_column: List[str], scramble_factor: float
+    input: Path, output: Path, score_column: List[str], scramble_factor: float
 ):
     """Scrambles semsim profile multiplying score value by scramble factor
     Args:
@@ -107,10 +107,10 @@ def semsim_scramble_command(
     type=Path,
 )
 def scramble_phenopackets_command(
-        phenopacket_path: Path,
-        phenopacket_dir: Path,
-        scramble_factor: float,
-        output_dir: Path,
+    phenopacket_path: Path,
+    phenopacket_dir: Path,
+    scramble_factor: float,
+    output_dir: Path,
 ):
     """Generate noisy phenopackets from existing ones."""
     if phenopacket_path is None and phenopacket_dir is None:
@@ -161,11 +161,11 @@ def scramble_phenopackets_command(
     help="Output path for the difference tsv. Defaults to percentage_diff.semsim.tsv",
 )
 def semsim_comparison(
-        semsim_left: Path,
-        semsim_right: Path,
-        score_column: str,
-        analysis: str,
-        output: Path = "percentage_diff.semsim.tsv",
+    semsim_left: Path,
+    semsim_right: Path,
+    score_column: str,
+    analysis: str,
+    output: Path = "percentage_diff.semsim.tsv",
 ):
     """Compares two semantic similarity profiles
 
@@ -222,7 +222,7 @@ def semsim_comparison(
     type=click.Choice(["ensembl_id", "entrez_id", "hgnc_id"]),
 )
 def update_phenopackets_command(
-        phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path, gene_identifier: str
+    phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path, gene_identifier: str
 ):
     """Update gene symbols and identifiers for phenopackets."""
     if phenopacket_path is None and phenopacket_dir is None:
@@ -299,13 +299,13 @@ def update_phenopackets_command(
     type=Path,
 )
 def create_spiked_vcfs_command(
-        phenopacket_path: Path,
-        phenopacket_dir: Path,
-        output_dir: Path,
-        hg19_template_vcf: Path = None,
-        hg38_template_vcf: Path = None,
-        hg19_vcf_dir: Path = None,
-        hg38_vcf_dir: Path = None,
+    phenopacket_path: Path,
+    phenopacket_dir: Path,
+    output_dir: Path,
+    hg19_template_vcf: Path = None,
+    hg38_template_vcf: Path = None,
+    hg19_vcf_dir: Path = None,
+    hg38_vcf_dir: Path = None,
 ):
     """
     Create spiked VCF from either a Phenopacket or a Phenopacket directory.
@@ -360,9 +360,9 @@ def create_spiked_vcfs_command(
     type=float,
 )
 def benchmark(
-        run_data: Path,
-        score_order: str,
-        threshold: float,
+    run_data: Path,
+    score_order: str,
+    threshold: float,
 ):
     """Benchmark the gene/variant/disease prioritisation performance for two runs."""
     benchmark_run_comparisons(
@@ -406,7 +406,7 @@ def benchmark(
     type=Path,
 )
 def semsim_to_exomiserdb_command(
-        input_file: Path, object_prefix: str, subject_prefix: str, db_path: Path
+    input_file: Path, object_prefix: str, subject_prefix: str, db_path: Path
 ):
     """ingests semsim file into exomiser phenotypic database
 
@@ -473,12 +473,12 @@ def semsim_to_exomiserdb_command(
     help='Title for plot, specify the title on the CLI enclosed with ""',
 )
 def generate_stats_plot(
-        benchmarking_tsv: Path,
-        gene_analysis: bool,
-        variant_analysis: bool,
-        disease_analysis: bool,
-        plot_type: str,
-        title: str = None,
+    benchmarking_tsv: Path,
+    gene_analysis: bool,
+    variant_analysis: bool,
+    disease_analysis: bool,
+    plot_type: str,
+    title: str = None,
 ):
     """Generate bar plot from benchmark stats summary tsv."""
     generate_plots_from_benchmark_summary_tsv(
@@ -576,16 +576,16 @@ def generate_stats_plot(
     type=Path,
 )
 def prepare_corpus_command(
-        phenopacket_dir: Path,
-        variant_analysis: bool,
-        gene_analysis: bool,
-        disease_analysis: bool,
-        gene_identifier: str,
-        hg19_template_vcf: Path,
-        hg38_template_vcf: Path,
-        hg19_vcf_dir: Path,
-        hg38_vcf_dir: Path,
-        output_dir: Path,
+    phenopacket_dir: Path,
+    variant_analysis: bool,
+    gene_analysis: bool,
+    disease_analysis: bool,
+    gene_identifier: str,
+    hg19_template_vcf: Path,
+    hg38_template_vcf: Path,
+    hg19_vcf_dir: Path,
+    hg38_vcf_dir: Path,
+    output_dir: Path,
 ):
     """
     Prepare a corpus of Phenopackets for analysis, optionally checking for complete variant records and updating

From fb87c980808072485c10acb57d53150ded4545f9 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 16:18:51 +0100
Subject: [PATCH 62/81] add function to check if table exists

---
 src/pheval/analyse/get_connection.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/get_connection.py
index e03d51450..b295333f4 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/get_connection.py
@@ -19,7 +19,9 @@ class DBConnector:
 
     def __init__(self, benchmark_name: str):
         """Initialise the DBConnector class."""
-        self.conn = self.get_connection(f"{benchmark_name}.db")
+        self.conn = self.get_connection(
+            f"{benchmark_name}" if str(benchmark_name).endswith(".db") else f"{benchmark_name}.db"
+        )
 
     def initialise(self):
         """Initialise the duckdb connection."""
@@ -118,6 +120,14 @@ def parse_table_into_dataclass(
         )
         return [dataclass(**row) for row in result]
 
+    def check_table_exists(self, table_name: str) -> bool:
+        result = self.conn.execute(
+            f"SELECT * FROM information_schema.tables WHERE table_name = '{table_name}'"
+        ).fetchall()
+        if result:
+            return True
+        return False
+
     def close(self):
         """Close the connection to the database."""
         self.conn.close()

From 10e6c33965dd7940a929beaacbc211eb7c2e2a35 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 16:19:07 +0100
Subject: [PATCH 63/81] implement methods to gather benchmarking stats results
 from db

---
 src/pheval/analyse/parse_benchmark_summary.py | 86 +++++++++++--------
 1 file changed, 50 insertions(+), 36 deletions(-)

diff --git a/src/pheval/analyse/parse_benchmark_summary.py b/src/pheval/analyse/parse_benchmark_summary.py
index e628292fa..6cf344ca5 100644
--- a/src/pheval/analyse/parse_benchmark_summary.py
+++ b/src/pheval/analyse/parse_benchmark_summary.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
 from pathlib import Path
 from typing import List
 
@@ -5,51 +6,29 @@
 
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
+from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.rank_stats import RankStats
 
 
-def read_benchmark_tsv_result_summary(benchmarking_tsv: Path) -> pd.DataFrame:
-    """
-    Read the summary benchmark TSV output generated from the benchmark-comparison command.
+@dataclass
+class BenchmarkSummaryResults:
+    gene_results: List[BenchmarkRunResults]
+    disease_results: List[BenchmarkRunResults]
+    variant_results: List[BenchmarkRunResults]
 
-    Args:
-        benchmarking_tsv (Path): Path to the summary benchmark TSV output file.
 
-    Returns:
-        pd.DataFrame: A pandas DataFrame containing specific columns from the TSV file, including:
-                      'results_directory_path', 'top', 'top3', 'top5', 'top10', 'found',
-                      'total', 'mean_reciprocal_rank'.
+def parse_benchmark_results(benchmark_summary_table: pd.DataFrame) -> List[BenchmarkRunResults]:
     """
-    return pd.read_csv(
-        benchmarking_tsv,
-        delimiter="\t",
-        usecols=[
-            "results_directory_path",
-            "top",
-            "top3",
-            "top5",
-            "top10",
-            "found",
-            "total",
-            "mean_reciprocal_rank",
-        ],
-    )
-
-
-def parse_benchmark_result_summary(benchmarking_df: pd.DataFrame) -> List[BenchmarkRunResults]:
-    """
-    Parse the summary benchmark DataFrame into a list of BenchmarkRunResults.
+    Parse benchmark results from a DataFrame.
 
     Args:
-        benchmarking_df (pd.DataFrame): Summary benchmark DataFrame containing columns such as
-                                        'results_directory_path', 'top', 'top3', 'top5', 'top10',
-                                        'found', 'total', 'mean_reciprocal_rank'.
+        benchmark_summary_table (pd.DataFrame): DataFrame containing benchmark results.
 
     Returns:
-        List[BenchmarkRunResults]: A list of BenchmarkRunResults instances generated from the DataFrame.
+        List[BenchmarkRunResults]: A list of BenchmarkRunResults objects parsed from the DataFrame.
     """
-    benchmarking_results = []
-    for _, row in benchmarking_df.iterrows():
+    results = []
+    for _, row in benchmark_summary_table.iterrows():
         benchmarking_result = BenchmarkRunResults(
             rank_stats=RankStats(
                 top=row["top"],
@@ -63,5 +42,40 @@ def parse_benchmark_result_summary(benchmarking_df: pd.DataFrame) -> List[Benchm
             benchmark_name=row["results_directory_path"],
             binary_classification_stats=BinaryClassificationStats(),
         )
-        benchmarking_results.append(benchmarking_result)
-    return benchmarking_results
+        results.append(benchmarking_result)
+    return results
+
+
+def parse_benchmark_db(benchmarking_db: Path) -> BenchmarkSummaryResults:
+    """
+    Read the summary benchmark TSV output generated from the benchmark-comparison command.
+
+    Args:
+        benchmarking_db (Path): Path to the benchmark db.
+
+    Returns:
+        BenchmarkSummaryResults: A dataclass containing all benchmarking results contained in the db.
+    """
+    db_connector = DBConnector(benchmarking_db)
+    gene_benchmarking_results, disease_benchmarking_results, variant_benchmarking_results = (
+        None,
+        None,
+        None,
+    )
+    if db_connector.check_table_exists("gene_summary"):
+        gene_benchmarking_results = parse_benchmark_results(
+            db_connector.conn.execute("SELECT * FROM gene_summary").fetchdf()
+        )
+    if db_connector.check_table_exists("disease_summary"):
+        disease_benchmarking_results = parse_benchmark_results(
+            db_connector.conn.execute("SELECT * FROM disease_summary").fetchdf()
+        )
+    if db_connector.check_table_exists("variant_summary"):
+        variant_benchmarking_results = parse_benchmark_results(
+            db_connector.conn.execute("SELECT * FROM variant_summary").fetchdf()
+        )
+    return BenchmarkSummaryResults(
+        gene_results=gene_benchmarking_results,
+        disease_results=disease_benchmarking_results,
+        variant_results=variant_benchmarking_results,
+    )

From 7b23d35d0b02f1da6a84047b248c59a48cdbc8a8 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 16:19:19 +0100
Subject: [PATCH 64/81] implement methods to gather benchmarking stats results
 from db

---
 src/pheval/analyse/generate_plots.py | 61 ++++++++++++++--------------
 1 file changed, 30 insertions(+), 31 deletions(-)

diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index 9f02baedd..462087595 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -14,10 +14,8 @@
     VariantBenchmarkRunOutputGenerator,
 )
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
-from pheval.analyse.parse_benchmark_summary import (
-    parse_benchmark_result_summary,
-    read_benchmark_tsv_result_summary,
-)
+from pheval.analyse.parse_benchmark_summary import parse_benchmark_db
+from pheval.analyse.run_data_parser import parse_run_config
 from pheval.constants import PHEVAL_RESULTS_DIRECTORY_SUFFIX
 
 
@@ -489,7 +487,7 @@ def generate_non_cumulative_bar(
 def generate_plots(
     benchmarking_results: List[BenchmarkRunResults],
     benchmark_generator: BenchmarkRunOutputGenerator,
-    generate_from_tsv: bool = False,
+    generate_from_db: bool = False,
 ) -> None:
     """
     Generate summary statistics bar plots for prioritisation.
@@ -499,10 +497,10 @@ def generate_plots(
     Args:
         benchmarking_results (list[BenchmarkRunResults]): List of benchmarking results for multiple runs.
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
-        generate_from_tsv (bool): Specify whether to generate plots from the TSV file. Defaults to False.
+        generate_from_db (bool): Specify whether to generate plots from the db file. Defaults to False.
     """
     plot_generator = PlotGenerator()
-    if not generate_from_tsv:
+    if not generate_from_db:
         plot_generator.generate_roc_curve(benchmarking_results, benchmark_generator)
         plot_generator.generate_precision_recall(benchmarking_results, benchmark_generator)
     if benchmark_generator.plot_customisation.plot_type == "bar_stacked":
@@ -513,36 +511,37 @@ def generate_plots(
         plot_generator.generate_non_cumulative_bar(benchmarking_results, benchmark_generator)
 
 
-def generate_plots_from_benchmark_summary_tsv(
-    benchmark_summary_tsv: Path,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
+def generate_plots_from_benchmark_summary_db(
+    benchmark_db: Path,
+    run_data: Path,
 ):
     """
     Generate bar plot from summary benchmark results.
 
-    Reads a summary of benchmark results from a TSV file and generates a bar plot
+    Reads a summary of benchmark results from a benchmark db and generates a bar plot
     based on the analysis type and plot type.
 
     Args:
-        benchmark_summary_tsv (Path): Path to the summary TSV file containing benchmark results.
-        gene_analysis (bool): Flag indicating whether to analyse gene prioritisation.
-        variant_analysis (bool): Flag indicating whether to analyse variant prioritisation.
-        disease_analysis (bool): Flag indicating whether to analyse disease prioritisation.
-    Raises:
-         ValueError: If an unsupported plot type is specified.
+        benchmark_db (Path): Path to the summary TSV file containing benchmark results.
+        run_data (Path): Path to YAML benchmarking configuration file.
     """
-    benchmark_stats_summary = read_benchmark_tsv_result_summary(benchmark_summary_tsv)
-    benchmarking_results = parse_benchmark_result_summary(benchmark_stats_summary)
-    if gene_analysis:
-        benchmark_generator = GeneBenchmarkRunOutputGenerator()
-    elif variant_analysis:
-        benchmark_generator = VariantBenchmarkRunOutputGenerator()
-    elif disease_analysis:
-        benchmark_generator = DiseaseBenchmarkRunOutputGenerator()
-    else:
-        raise ValueError(
-            "Specify one analysis type (gene_analysis, variant_analysis, or disease_analysis)"
+    benchmark_stats_summary = parse_benchmark_db(benchmark_db)
+    config = parse_run_config(run_data)
+    if benchmark_stats_summary.gene_results:
+        generate_plots(
+            benchmark_stats_summary.gene_results,
+            GeneBenchmarkRunOutputGenerator(config.plot_customisation.gene_plots),
+            True,
+        )
+    if benchmark_stats_summary.variant_results:
+        generate_plots(
+            benchmark_stats_summary.variant_results,
+            VariantBenchmarkRunOutputGenerator(config.plot_customisation.variant_plots),
+            True,
+        )
+    elif benchmark_stats_summary.disease_results:
+        generate_plots(
+            benchmark_stats_summary.disease_results,
+            DiseaseBenchmarkRunOutputGenerator(config.plot_customisation.disease_plots),
+            True,
         )
-    generate_plots(benchmarking_results, benchmark_generator, True)

From 6e4b1834a49bc4a42ed1e5f59b9711a43e5119d1 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 16:19:40 +0100
Subject: [PATCH 65/81] refactor `generate_plots_from_benchmark_summary_tsv` to
 `generate_plots_from_benchmark_summary_db`

---
 src/pheval/cli_pheval_utils.py | 68 +++++++---------------------------
 1 file changed, 13 insertions(+), 55 deletions(-)

diff --git a/src/pheval/cli_pheval_utils.py b/src/pheval/cli_pheval_utils.py
index 1f95176e4..dc56a5264 100644
--- a/src/pheval/cli_pheval_utils.py
+++ b/src/pheval/cli_pheval_utils.py
@@ -6,7 +6,7 @@
 import click
 
 from pheval.analyse.analysis import benchmark_run_comparisons
-from pheval.analyse.generate_plots import generate_plots_from_benchmark_summary_tsv
+from pheval.analyse.generate_plots import generate_plots_from_benchmark_summary_db
 from pheval.analyse.run_data_parser import parse_run_config
 from pheval.prepare.create_noisy_phenopackets import scramble_phenopackets
 from pheval.prepare.create_spiked_vcf import spike_vcfs
@@ -421,69 +421,27 @@ def semsim_to_exomiserdb_command(
 
 @click.command()
 @click.option(
-    "--benchmarking-tsv",
+    "--benchmark-db",
     "-b",
     required=True,
     metavar="PATH",
-    help="Path to benchmark summary tsv output by PhEval benchmark commands.",
+    help="Path to benchmark db output by PhEval benchmark commands.",
     type=Path,
 )
 @click.option(
-    "--gene-analysis/--no-gene-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for gene prioritisation",
-    cls=MutuallyExclusiveOptionError,
-    mutually_exclusive=["variant_analysis", "disease_analysis"],
-)
-@click.option(
-    "--variant-analysis/--no-variant-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for variant prioritisation",
-    cls=MutuallyExclusiveOptionError,
-    mutually_exclusive=["gene_analysis", "disease_analysis"],
-)
-@click.option(
-    "--disease-analysis/--no-disease-analysis",
-    default=False,
-    required=False,
-    type=bool,
-    show_default=True,
-    help="Specify analysis for disease prioritisation",
-    cls=MutuallyExclusiveOptionError,
-    mutually_exclusive=["gene_analysis", "variant_analysis"],
-)
-@click.option(
-    "--plot-type",
-    "-y",
-    default="bar_cumulative",
-    show_default=True,
-    type=click.Choice(["bar_stacked", "bar_cumulative", "bar_non_cumulative"]),
-    help="Bar chart type to output.",
-)
-@click.option(
-    "--title",
-    "-t",
-    type=str,
-    help='Title for plot, specify the title on the CLI enclosed with ""',
+    "--run-data",
+    "-r",
+    required=True,
+    metavar="PATH",
+    help="Path to yaml configuration file for benchmarking.",
+    type=Path,
 )
 def generate_stats_plot(
-    benchmarking_tsv: Path,
-    gene_analysis: bool,
-    variant_analysis: bool,
-    disease_analysis: bool,
-    plot_type: str,
-    title: str = None,
+    benchmark_db: Path,
+    run_data: Path,
 ):
-    """Generate bar plot from benchmark stats summary tsv."""
-    generate_plots_from_benchmark_summary_tsv(
-        benchmarking_tsv, gene_analysis, variant_analysis, disease_analysis, plot_type, title
-    )
+    """Generate bar plot from benchmark db."""
+    generate_plots_from_benchmark_summary_db(benchmark_db, run_data)
 
 
 @click.command("prepare-corpus")

From c2dcea168e55867d4558d84e74c56f8b08de3d2e Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 16:25:39 +0100
Subject: [PATCH 66/81] add customisation of plot output file names

---
 src/pheval/analyse/generate_plots.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index 462087595..4d680b450 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -48,9 +48,7 @@ class PlotGenerator:
         "#1b9e77",
     ]
 
-    def __init__(
-        self,
-    ):
+    def __init__(self, benchmark_name: str):
         """
         Initialise the PlotGenerator class.
         Note:
@@ -59,6 +57,7 @@ def __init__(
             Matplotlib settings are configured to remove the right and top axes spines
             for generated plots.
         """
+        self.benchmark_name = benchmark_name
         self.stats, self.mrr = [], []
         matplotlib.rcParams["axes.spines.right"] = False
         matplotlib.rcParams["axes.spines.top"] = False
@@ -171,7 +170,7 @@ def generate_stacked_bar_plot(
             )
         plt.ylim(0, 100)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -189,7 +188,7 @@ def generate_stacked_bar_plot(
         )
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_string}_mrr.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_mrr.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -284,7 +283,7 @@ def generate_cumulative_bar(
             )
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -393,7 +392,7 @@ def generate_roc_curve(
             plt.title(benchmark_generator.plot_customisation.roc_curve_title)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_string}_roc_curve.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_roc_curve.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -435,7 +434,7 @@ def generate_precision_recall(
             plt.title(benchmark_generator.plot_customisation.precision_recall_title)
         plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.15))
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_string}_precision_recall_curve.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_pr_curve.svg",
             format="svg",
             bbox_inches="tight",
         )
@@ -478,13 +477,14 @@ def generate_non_cumulative_bar(
             )
         plt.ylim(0, 1)
         plt.savefig(
-            f"{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
+            f"{self.benchmark_name}_{benchmark_generator.prioritisation_type_string}_rank_stats.svg",
             format="svg",
             bbox_inches="tight",
         )
 
 
 def generate_plots(
+    benchmark_name: str,
     benchmarking_results: List[BenchmarkRunResults],
     benchmark_generator: BenchmarkRunOutputGenerator,
     generate_from_db: bool = False,
@@ -499,7 +499,7 @@ def generate_plots(
         benchmark_generator (BenchmarkRunOutputGenerator): Object containing benchmarking output generation details.
         generate_from_db (bool): Specify whether to generate plots from the db file. Defaults to False.
     """
-    plot_generator = PlotGenerator()
+    plot_generator = PlotGenerator(benchmark_name)
     if not generate_from_db:
         plot_generator.generate_roc_curve(benchmarking_results, benchmark_generator)
         plot_generator.generate_precision_recall(benchmarking_results, benchmark_generator)
@@ -529,18 +529,21 @@ def generate_plots_from_benchmark_summary_db(
     config = parse_run_config(run_data)
     if benchmark_stats_summary.gene_results:
         generate_plots(
+            config.benchmark_name,
             benchmark_stats_summary.gene_results,
             GeneBenchmarkRunOutputGenerator(config.plot_customisation.gene_plots),
             True,
         )
     if benchmark_stats_summary.variant_results:
         generate_plots(
+            config.benchmark_name,
             benchmark_stats_summary.variant_results,
             VariantBenchmarkRunOutputGenerator(config.plot_customisation.variant_plots),
             True,
         )
     elif benchmark_stats_summary.disease_results:
         generate_plots(
+            config.benchmark_name,
             benchmark_stats_summary.disease_results,
             DiseaseBenchmarkRunOutputGenerator(config.plot_customisation.disease_plots),
             True,

From 7f0509669f8b8f1cdefae3c42f05f693afc26de2 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 16:26:18 +0100
Subject: [PATCH 67/81] add missing argument

---
 tests/test_generate_plots.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_generate_plots.py b/tests/test_generate_plots.py
index 42469f0e3..83842c686 100644
--- a/tests/test_generate_plots.py
+++ b/tests/test_generate_plots.py
@@ -10,9 +10,9 @@
 
 class TestPlotGenerator(unittest.TestCase):
     def setUp(self) -> None:
-        self.gene_plot_generator = PlotGenerator()
-        self.variant_plot_generator = PlotGenerator()
-        self.disease_plot_generator = PlotGenerator()
+        self.gene_plot_generator = PlotGenerator("test")
+        self.variant_plot_generator = PlotGenerator("test")
+        self.disease_plot_generator = PlotGenerator("test")
         self.benchmarking_result = BenchmarkRunResults(
             benchmark_name="tool_corpus",
             rank_stats=RankStats(

From c4420b653d78c01499964a610978449c30e61e90 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 22 Aug 2024 16:33:01 +0100
Subject: [PATCH 68/81] refactor `benchmark` to `generate_benchmark_stats`

---
 src/pheval/cli.py              | 5 ++---
 src/pheval/cli_pheval_utils.py | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/pheval/cli.py b/src/pheval/cli.py
index 3bf4e235a..c9d3654f1 100644
--- a/src/pheval/cli.py
+++ b/src/pheval/cli.py
@@ -6,8 +6,8 @@
 
 from .cli_pheval import run
 from .cli_pheval_utils import (
-    benchmark,
     create_spiked_vcfs_command,
+    generate_benchmark_stats,
     generate_stats_plot,
     prepare_corpus_command,
     scramble_phenopackets_command,
@@ -56,8 +56,7 @@ def pheval_utils():
 pheval_utils.add_command(scramble_phenopackets_command)
 pheval_utils.add_command(update_phenopackets_command)
 pheval_utils.add_command(create_spiked_vcfs_command)
-pheval_utils.add_command(benchmark)
-pheval_utils.add_command(benchmark)
+pheval_utils.add_command(generate_benchmark_stats)
 pheval_utils.add_command(semsim_to_exomiserdb_command)
 pheval_utils.add_command(generate_stats_plot)
 pheval_utils.add_command(prepare_corpus_command)
diff --git a/src/pheval/cli_pheval_utils.py b/src/pheval/cli_pheval_utils.py
index dc56a5264..ab3c4aae9 100644
--- a/src/pheval/cli_pheval_utils.py
+++ b/src/pheval/cli_pheval_utils.py
@@ -359,7 +359,7 @@ def create_spiked_vcfs_command(
     help="Score threshold.",
     type=float,
 )
-def benchmark(
+def generate_benchmark_stats(
     run_data: Path,
     score_order: str,
     threshold: float,

From 053126a0e9c2344618761632e73420c72151f8b1 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 27 Aug 2024 15:08:17 +0100
Subject: [PATCH 69/81] remove threshold and score order parameters as these
 are now included in the config

---
 src/pheval/cli_pheval_utils.py | 30 ++++--------------------------
 1 file changed, 4 insertions(+), 26 deletions(-)

diff --git a/src/pheval/cli_pheval_utils.py b/src/pheval/cli_pheval_utils.py
index ab3c4aae9..ce522a632 100644
--- a/src/pheval/cli_pheval_utils.py
+++ b/src/pheval/cli_pheval_utils.py
@@ -334,41 +334,19 @@ def create_spiked_vcfs_command(
 
 @click.command()
 @click.option(
-    "--run-data",
+    "--run-yaml",
     "-r",
     required=True,
     metavar="PATH",
     help="Path to yaml configuration file for benchmarking.",
     type=Path,
 )
-@click.option(
-    "--score-order",
-    "-so",
-    required=True,
-    help="Ordering of results for ranking.",
-    type=click.Choice(["ascending", "descending"]),
-    default="descending",
-    show_default=True,
-)
-@click.option(
-    "--threshold",
-    "-t",
-    metavar="<float>",
-    default=float(0.0),
-    required=False,
-    help="Score threshold.",
-    type=float,
-)
 def generate_benchmark_stats(
-    run_data: Path,
-    score_order: str,
-    threshold: float,
+    run_yaml: Path,
 ):
-    """Benchmark the gene/variant/disease prioritisation performance for two runs."""
+    """Benchmark the gene/variant/disease prioritisation performance for runs."""
     benchmark_run_comparisons(
-        parse_run_config(run_data),
-        score_order,
-        threshold,
+        parse_run_config(run_yaml),
     )
 
 

From 697561bba4da18824c2f7fcc97149a5e02df0b5c Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 27 Aug 2024 15:22:32 +0100
Subject: [PATCH 70/81] remove threshold and score order parameters

---
 src/pheval/analyse/analysis.py | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/src/pheval/analyse/analysis.py b/src/pheval/analyse/analysis.py
index ea50e9c0c..b351b417e 100644
--- a/src/pheval/analyse/analysis.py
+++ b/src/pheval/analyse/analysis.py
@@ -12,8 +12,6 @@
 
 def _run_benchmark_comparison(
     run_config: Config,
-    score_order: str,
-    threshold: float,
     benchmark_generator: BenchmarkRunOutputGenerator,
 ) -> None:
     """
@@ -22,8 +20,6 @@ def _run_benchmark_comparison(
     Args:
         run_config (List[TrackInputOutputDirectories]): List of input and output directories
             for tracking results across multiple directories.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        threshold (float): The threshold for benchmark evaluation.
         benchmark_generator (BenchmarkRunOutputGenerator): Generator for benchmark run output.
     """
     stats_writer = RankStatsWriter(
@@ -39,7 +35,7 @@ def _run_benchmark_comparison(
     benchmarking_results = []
     for run in run_config.runs:
         benchmark_result = benchmark_generator.generate_benchmark_run_results(
-            run_config.benchmark_name, run, score_order, threshold
+            run_config.benchmark_name, run, run.score_order, run.threshold
         )
         stats_writer.add_statistics_entry(
             run.run_identifier,
@@ -63,16 +59,12 @@ def _run_benchmark_comparison(
 
 def benchmark_run_comparisons(
     run_config: Config,
-    score_order: str,
-    threshold: float,
 ) -> None:
     """
     Benchmark prioritisation performance for several runs.
 
     Args:
         run_config (Config): Run configurations.
-        score_order (str): The order in which scores are arranged, this can be either ascending or descending.
-        threshold (float): The threshold for benchmark evaluation.
     """
     gene_analysis_runs = Config(
         benchmark_name=run_config.benchmark_name,
@@ -92,8 +84,6 @@ def benchmark_run_comparisons(
     if gene_analysis_runs.runs:
         _run_benchmark_comparison(
             run_config=gene_analysis_runs,
-            score_order=score_order,
-            threshold=threshold,
             benchmark_generator=GeneBenchmarkRunOutputGenerator(
                 plot_customisation=gene_analysis_runs.plot_customisation.gene_plots
             ),
@@ -101,8 +91,6 @@ def benchmark_run_comparisons(
     if variant_analysis_runs.runs:
         _run_benchmark_comparison(
             run_config=variant_analysis_runs,
-            score_order=score_order,
-            threshold=threshold,
             benchmark_generator=VariantBenchmarkRunOutputGenerator(
                 plot_customisation=variant_analysis_runs.plot_customisation.variant_plots
             ),
@@ -110,8 +98,6 @@ def benchmark_run_comparisons(
     if disease_analysis_runs.runs:
         _run_benchmark_comparison(
             run_config=disease_analysis_runs,
-            score_order=score_order,
-            threshold=threshold,
             benchmark_generator=DiseaseBenchmarkRunOutputGenerator(
                 plot_customisation=disease_analysis_runs.plot_customisation.disease_plots
             ),

From a3608e21962770eae4b59bb2144e5b187fa100ce Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 27 Aug 2024 15:23:11 +0100
Subject: [PATCH 71/81] add threshold and score order parameters with default
 values

---
 src/pheval/analyse/run_data_parser.py | 52 ++++++++++++++++++++++++---
 1 file changed, 47 insertions(+), 5 deletions(-)

diff --git a/src/pheval/analyse/run_data_parser.py b/src/pheval/analyse/run_data_parser.py
index 1689499ab..6c81e6c93 100644
--- a/src/pheval/analyse/run_data_parser.py
+++ b/src/pheval/analyse/run_data_parser.py
@@ -2,7 +2,7 @@
 from typing import List, Optional
 
 import yaml
-from pydantic import BaseModel
+from pydantic import BaseModel, root_validator
 
 
 class RunConfig(BaseModel):
@@ -16,6 +16,8 @@ class RunConfig(BaseModel):
         gene_analysis (bool): Whether or not to benchmark gene analysis results.
         variant_analysis (bool): Whether or not to benchmark variant analysis results.
         disease_analysis (bool): Whether or not to benchmark disease analysis results.
+        threshold (Optional[float]): The threshold to consider for benchmarking.
+        score_order (Optional[str]): The order of scores to consider for benchmarking, either ascending or descending.
     """
 
     run_identifier: str
@@ -24,6 +26,28 @@ class RunConfig(BaseModel):
     gene_analysis: bool
     variant_analysis: bool
     disease_analysis: bool
+    threshold: Optional[float]
+    score_order: Optional[str]
+
+    @classmethod
+    @root_validator(pre=True)
+    def handle_blank_fields(cls, values: dict) -> dict:
+        """
+        Root validator to handle fields that may be explicitly set to None.
+
+        This method checks if 'threshold' and 'score_order' are None and assigns default values if so.
+
+        Args:
+            values (dict): The input values provided to the model.
+
+        Returns:
+            dict: The updated values with defaults applied where necessary.
+        """
+        if values.get("threshold") is None:
+            values["threshold"] = 0
+        if values.get("score_order") is None:
+            values["score_order"] = "descending"
+        return values
 
 
 class SinglePlotCustomisation(BaseModel):
@@ -37,11 +61,29 @@ class SinglePlotCustomisation(BaseModel):
         precision_recall_title (str): The title for the precision-recall plot.
     """
 
-    plot_type: Optional[str]
+    plot_type: Optional[str] = "bar_cumulative"
     rank_plot_title: Optional[str]
     roc_curve_title: Optional[str]
     precision_recall_title: Optional[str]
 
+    @classmethod
+    @root_validator(pre=True)
+    def handle_blank_fields(cls, values: dict) -> dict:
+        """
+        Root validator to handle fields that may be explicitly set to None.
+
+        This method checks if 'plot_type' is None and assigns default value if so.
+
+        Args:
+            values (dict): The input values provided to the model.
+
+        Returns:
+            dict: The updated values with defaults applied where necessary.
+        """
+        if values.get("plot_type") is None:
+            values["plot_type"] = "bar_cumulative"
+        return values
+
 
 class PlotCustomisation(BaseModel):
     """
@@ -69,15 +111,15 @@ class Config(BaseModel):
     plot_customisation: PlotCustomisation
 
 
-def parse_run_config(run_data_path: Path) -> Config:
+def parse_run_config(run_config: Path) -> Config:
     """
     Parse a run configuration yaml file.
     Args:
-        run_data_path (Path): The path to the run data yaml configuration.
+        run_config (Path): The path to the run data yaml configuration.
     Returns:
         Config: The parsed run configurations.
     """
-    with open(run_data_path, "r") as f:
+    with open(run_config, "r") as f:
         config_data = yaml.safe_load(f)
     f.close()
     config = Config(**config_data)

From 0a8d7c4ca1381d7d9e9082a5abd20e850b4c0824 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 27 Aug 2024 15:23:39 +0100
Subject: [PATCH 72/81] refactor `DBConnector` to `BenchmarkDBManager`

---
 .../{get_connection.py => benchmark_db_manager.py} | 11 +++++++++--
 .../analyse/disease_prioritisation_analysis.py     |  8 ++++----
 src/pheval/analyse/gene_prioritisation_analysis.py |  8 ++++----
 src/pheval/analyse/generate_summary_outputs.py     |  9 +++++----
 src/pheval/analyse/parse_benchmark_summary.py      |  4 ++--
 src/pheval/analyse/parse_corpus.py                 |  4 ++--
 src/pheval/analyse/rank_stats.py                   |  8 ++++----
 .../analyse/variant_prioritisation_analysis.py     |  8 ++++----
 tests/test_analysis.py                             | 14 +++++++-------
 tests/test_generate_summary_outputs.py             |  6 +++---
 tests/test_rank_stats.py                           |  2 +-
 11 files changed, 45 insertions(+), 37 deletions(-)
 rename src/pheval/analyse/{get_connection.py => benchmark_db_manager.py} (92%)

diff --git a/src/pheval/analyse/get_connection.py b/src/pheval/analyse/benchmark_db_manager.py
similarity index 92%
rename from src/pheval/analyse/get_connection.py
rename to src/pheval/analyse/benchmark_db_manager.py
index b295333f4..fb69c2c6a 100644
--- a/src/pheval/analyse/get_connection.py
+++ b/src/pheval/analyse/benchmark_db_manager.py
@@ -12,13 +12,13 @@
 )
 
 
-class DBConnector:
+class BenchmarkDBManager:
     """
     Class to connect to database.
     """
 
     def __init__(self, benchmark_name: str):
-        """Initialise the DBConnector class."""
+        """Initialise the BenchmarkDBManager class."""
         self.conn = self.get_connection(
             f"{benchmark_name}" if str(benchmark_name).endswith(".db") else f"{benchmark_name}.db"
         )
@@ -121,6 +121,13 @@ def parse_table_into_dataclass(
         return [dataclass(**row) for row in result]
 
     def check_table_exists(self, table_name: str) -> bool:
+        """
+        Check if a table exists in the connected DuckDB database.
+        Args:
+            table_name (str): The name of the table to check for existence.
+        Returns:
+            bool: Returns `True` if the table exists in the database, `False` otherwise.
+        """
         result = self.conn.execute(
             f"SELECT * FROM information_schema.tables WHERE table_name = '{table_name}'"
         ).fetchall()
diff --git a/src/pheval/analyse/disease_prioritisation_analysis.py b/src/pheval/analyse/disease_prioritisation_analysis.py
index 9a4d29f79..23f95d9b3 100644
--- a/src/pheval/analyse/disease_prioritisation_analysis.py
+++ b/src/pheval/analyse/disease_prioritisation_analysis.py
@@ -1,8 +1,8 @@
 from pathlib import Path
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import RunConfig
 from pheval.post_processing.post_processing import RankedPhEvalDiseaseResult
@@ -14,7 +14,7 @@ class AssessDiseasePrioritisation:
 
     def __init__(
         self,
-        db_connection: DBConnector,
+        db_connection: BenchmarkDBManager,
         table_name: str,
         column: str,
         threshold: float,
@@ -24,7 +24,7 @@ def __init__(
         Initialise AssessDiseasePrioritisation class
 
         Args:
-            db_connection (DBConnector): Database connection
+            db_connection (BenchmarkDBManager): Database connection
             table_name (str): Table name
             column (Path): Column name
             threshold (float): Threshold for scores
@@ -208,7 +208,7 @@ def benchmark_disease_prioritisation(
         including ranks and rank statistics for the benchmarked directory.
     """
     disease_binary_classification_stats = BinaryClassificationStats()
-    db_connection = DBConnector(benchmark_name)
+    db_connection = BenchmarkDBManager(benchmark_name)
     db_connection.initialise()
     disease_benchmarker = AssessDiseasePrioritisation(
         db_connection,
diff --git a/src/pheval/analyse/gene_prioritisation_analysis.py b/src/pheval/analyse/gene_prioritisation_analysis.py
index adb262062..61edd3190 100644
--- a/src/pheval/analyse/gene_prioritisation_analysis.py
+++ b/src/pheval/analyse/gene_prioritisation_analysis.py
@@ -1,8 +1,8 @@
 from pathlib import Path
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import RunConfig
 from pheval.post_processing.post_processing import RankedPhEvalGeneResult
@@ -14,7 +14,7 @@ class AssessGenePrioritisation:
 
     def __init__(
         self,
-        db_connection: DBConnector,
+        db_connection: BenchmarkDBManager,
         table_name: str,
         column: str,
         threshold: float,
@@ -24,7 +24,7 @@ def __init__(
         Initialise AssessGenePrioritisation class.
 
         Args:
-            db_connection (DBConnector): Database connection
+            db_connection (BenchmarkDBManager): Database connection
             table_name (str): Table name
             column (Path): Column name
             threshold (float): Threshold for scores
@@ -190,7 +190,7 @@ def benchmark_gene_prioritisation(
          including ranks and rank statistics for the benchmarked directory.
     """
     gene_binary_classification_stats = BinaryClassificationStats()
-    db_connection = DBConnector(benchmark_name)
+    db_connection = BenchmarkDBManager(benchmark_name)
     db_connection.initialise()
     gene_benchmarker = AssessGenePrioritisation(
         db_connection,
diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index abf217ef8..7efe616f2 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -1,10 +1,10 @@
 import itertools
 from typing import List
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.generate_plots import generate_plots
-from pheval.analyse.get_connection import DBConnector
 from pheval.constants import RANK_COMPARISON_SUFFIX
 
 
@@ -25,7 +25,7 @@ def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_pref
 
 def create_comparison_table(
     comparison_table_name: str,
-    connector: DBConnector,
+    connector: BenchmarkDBManager,
     drop_columns: List[str],
     run_identifier_1: str,
     run_identifier_2: str,
@@ -35,7 +35,7 @@ def create_comparison_table(
     Create rank comparison tables.
     Args:
         comparison_table_name (str): Name of the comparison table to create.
-        connector (DBConnector): DBConnector instance.
+        connector (BenchmarkDBManager): DBConnector instance.
         drop_columns (List[str]): List of columns to drop.
         run_identifier_1 (str): The first run identifier.
         run_identifier_2 (str): The second run identifier.
@@ -83,7 +83,7 @@ def generate_benchmark_comparison_output(
         table_name (str): The name of the table where ranks are stored.
     """
     output_prefix = benchmark_generator.prioritisation_type_string
-    connector = DBConnector(benchmark_name)
+    connector = BenchmarkDBManager(benchmark_name)
     for pair in itertools.combinations(
         [str(result.benchmark_name) for result in benchmarking_results], 2
     ):
@@ -102,6 +102,7 @@ def generate_benchmark_comparison_output(
             table_name,
         )
     generate_plots(
+        benchmark_name,
         benchmarking_results,
         benchmark_generator,
     )
diff --git a/src/pheval/analyse/parse_benchmark_summary.py b/src/pheval/analyse/parse_benchmark_summary.py
index 6cf344ca5..8970b52bb 100644
--- a/src/pheval/analyse/parse_benchmark_summary.py
+++ b/src/pheval/analyse/parse_benchmark_summary.py
@@ -4,9 +4,9 @@
 
 import pandas as pd
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.rank_stats import RankStats
 
 
@@ -56,7 +56,7 @@ def parse_benchmark_db(benchmarking_db: Path) -> BenchmarkSummaryResults:
     Returns:
         BenchmarkSummaryResults: A dataclass containing all benchmarking results contained in the db.
     """
-    db_connector = DBConnector(benchmarking_db)
+    db_connector = BenchmarkDBManager(benchmarking_db)
     gene_benchmarking_results, disease_benchmarking_results, variant_benchmarking_results = (
         None,
         None,
diff --git a/src/pheval/analyse/parse_corpus.py b/src/pheval/analyse/parse_corpus.py
index de2844e3f..af7a7e6f4 100644
--- a/src/pheval/analyse/parse_corpus.py
+++ b/src/pheval/analyse/parse_corpus.py
@@ -1,13 +1,13 @@
 from pathlib import Path
 from typing import List
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.benchmark_generator import (
     BenchmarkRunOutputGenerator,
     DiseaseBenchmarkRunOutputGenerator,
     GeneBenchmarkRunOutputGenerator,
     VariantBenchmarkRunOutputGenerator,
 )
-from pheval.analyse.get_connection import DBConnector
 from pheval.utils.file_utils import all_files
 from pheval.utils.phenopacket_utils import (
     GenomicVariant,
@@ -72,7 +72,7 @@ def __init__(self, benchmark_name: str, phenopacket_dir: Path) -> None:
             phenopacket_dir (Path): Path to the Phenopacket directory.
         """
         self.phenopacket_dir = phenopacket_dir
-        self.conn = DBConnector(benchmark_name).conn
+        self.conn = BenchmarkDBManager(benchmark_name).conn
         self.table_name = phenopacket_dir.parents[0].name
 
     def _create_gene_table(self) -> None:
diff --git a/src/pheval/analyse/rank_stats.py b/src/pheval/analyse/rank_stats.py
index 755b44fb6..53a17a87d 100644
--- a/src/pheval/analyse/rank_stats.py
+++ b/src/pheval/analyse/rank_stats.py
@@ -6,8 +6,8 @@
 from duckdb import DuckDBPyConnection
 from sklearn.metrics import ndcg_score
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.get_connection import DBConnector
 
 
 @dataclass
@@ -43,7 +43,7 @@ def add_ranks(self, benchmark_name: str, table_name: str, column_name: str) -> N
             table_name (str): Name of the table to add ranks from.
             column_name (str): Name of the column to add ranks from.:
         """
-        conn = DBConnector(benchmark_name).conn
+        conn = BenchmarkDBManager(benchmark_name).conn
         self.top = self._execute_count_query(conn, table_name, column_name, " = 1")
         self.top3 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 3")
         self.top5 = self._execute_count_query(conn, table_name, column_name, " BETWEEN 1 AND 5")
@@ -336,7 +336,7 @@ def __init__(self, benchmark_name: str, table_name: str):
 
         self.table_name = table_name
         self.benchmark_name = benchmark_name
-        conn = DBConnector(benchmark_name).conn
+        conn = BenchmarkDBManager(benchmark_name).conn
         conn.execute(
             f'CREATE TABLE IF NOT EXISTS "{self.table_name}" ('
             f"results_directory_path VARCHAR,"
@@ -397,7 +397,7 @@ def add_statistics_entry(
             rank_stats (RankStats): RankStats object for the run.
             binary_classification (BinaryClassificationStats): BinaryClassificationStats object for the run.
         """
-        conn = DBConnector(self.benchmark_name).conn
+        conn = BenchmarkDBManager(self.benchmark_name).conn
         conn.execute(
             f' INSERT INTO "{self.table_name}" VALUES ( '
             f"'{run_identifier}',"
diff --git a/src/pheval/analyse/variant_prioritisation_analysis.py b/src/pheval/analyse/variant_prioritisation_analysis.py
index ae9afdc77..e4c1e3490 100644
--- a/src/pheval/analyse/variant_prioritisation_analysis.py
+++ b/src/pheval/analyse/variant_prioritisation_analysis.py
@@ -1,8 +1,8 @@
 from pathlib import Path
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
-from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.rank_stats import RankStats
 from pheval.analyse.run_data_parser import RunConfig
 from pheval.post_processing.post_processing import RankedPhEvalVariantResult
@@ -15,7 +15,7 @@ class AssessVariantPrioritisation:
 
     def __init__(
         self,
-        db_connection: DBConnector,
+        db_connection: BenchmarkDBManager,
         table_name: str,
         column: str,
         threshold: float,
@@ -25,7 +25,7 @@ def __init__(
         Initialise AssessVariantPrioritisation class
 
         Args:
-            db_connection (DBConnector): DB connection.
+            db_connection (BenchmarkDBManager): DB connection.
             table_name (str): Table name.
             column (str): Column name.
             threshold (float): Threshold for scores
@@ -214,7 +214,7 @@ def benchmark_variant_prioritisation(
         including ranks and rank statistics for the benchmarked directory.
     """
     variant_binary_classification_stats = BinaryClassificationStats()
-    db_connection = DBConnector(benchmark_name)
+    db_connection = BenchmarkDBManager(benchmark_name)
     variant_benchmarker = AssessVariantPrioritisation(
         db_connection,
         f"{run.phenopacket_dir.parents[0].name}" f"_variant",
diff --git a/tests/test_analysis.py b/tests/test_analysis.py
index 27c8fb375..3633d0723 100644
--- a/tests/test_analysis.py
+++ b/tests/test_analysis.py
@@ -5,10 +5,10 @@
 
 import duckdb
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.binary_classification_stats import BinaryClassificationStats
 from pheval.analyse.disease_prioritisation_analysis import AssessDiseasePrioritisation
 from pheval.analyse.gene_prioritisation_analysis import AssessGenePrioritisation
-from pheval.analyse.get_connection import DBConnector
 from pheval.analyse.variant_prioritisation_analysis import AssessVariantPrioritisation
 from pheval.post_processing.post_processing import (
     RankedPhEvalDiseaseResult,
@@ -47,12 +47,12 @@ def tearDownClass(cls):
 
     def setUp(self):
         patcher = patch(
-            "pheval.analyse.get_connection.DBConnector.get_connection",
+            "pheval.analyse.benchmark_db_manager.BenchmarkDBManager.get_connection",
             return_value=self.db_connection,
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector("null")
+        self.db_connector = BenchmarkDBManager(None)
         self.assess_gene_prioritisation = AssessGenePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_gene",
@@ -187,12 +187,12 @@ def tearDownClass(cls):
 
     def setUp(self):
         patcher = patch(
-            "pheval.analyse.get_connection.DBConnector.get_connection",
+            "pheval.analyse.benchmark_db_manager.BenchmarkDBManager.get_connection",
             return_value=self.db_connection,
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector("None")
+        self.db_connector = BenchmarkDBManager("None")
         self.assess_variant_prioritisation = AssessVariantPrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_variant",
@@ -356,12 +356,12 @@ def tearDownClass(cls):
 
     def setUp(self):
         patcher = patch(
-            "pheval.analyse.get_connection.DBConnector.get_connection",
+            "pheval.analyse.benchmark_db_manager.BenchmarkDBManager.get_connection",
             return_value=self.db_connection,
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector("None")
+        self.db_connector = BenchmarkDBManager("None")
         self.assess_disease_prioritisation = AssessDiseasePrioritisation(
             db_connection=self.db_connector,
             table_name="test_table_disease",
diff --git a/tests/test_generate_summary_outputs.py b/tests/test_generate_summary_outputs.py
index c2ec3530a..cc9d93df0 100644
--- a/tests/test_generate_summary_outputs.py
+++ b/tests/test_generate_summary_outputs.py
@@ -3,8 +3,8 @@
 
 import duckdb
 
+from pheval.analyse.benchmark_db_manager import BenchmarkDBManager
 from pheval.analyse.generate_summary_outputs import create_comparison_table, get_new_table_name
-from pheval.analyse.get_connection import DBConnector
 
 
 class TestGetNewTableName(unittest.TestCase):
@@ -38,12 +38,12 @@ def tearDownClass(cls):
 
     def setUp(self):
         patcher = patch(
-            "pheval.analyse.get_connection.DBConnector.get_connection",
+            "pheval.analyse.benchmark_db_manager.BenchmarkDBManager.get_connection",
             return_value=self.db_connection,
         )
         self.mock_get_connection = patcher.start()
         self.addCleanup(patcher.stop)
-        self.db_connector = DBConnector("None")
+        self.db_connector = BenchmarkDBManager("None")
 
     def test_create_comparison_table(self):
         create_comparison_table(
diff --git a/tests/test_rank_stats.py b/tests/test_rank_stats.py
index d5e92b323..fb9bbfdc4 100644
--- a/tests/test_rank_stats.py
+++ b/tests/test_rank_stats.py
@@ -41,7 +41,7 @@ def setUp(self) -> None:
         )
 
     @patch(
-        "pheval.analyse.get_connection.DBConnector.get_connection",
+        "pheval.analyse.benchmark_db_manager.BenchmarkDBManager.get_connection",
         return_value=duckdb.connect(":memory:"),
     )
     def test_add_ranks(self, mock_get_connection):

From a87176b6da48f0960b4da7269d9950dc86b3e315 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 27 Aug 2024 15:23:57 +0100
Subject: [PATCH 73/81] Add Executing a Benchmark

---
 mkdocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index 02c6a32a1..aa7ae70f2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -56,6 +56,7 @@ nav:
       - "styleguide.md"
       - "CODE_OF_CONDUCT.md"
   - Plugins: "plugins.md"
+  - Executing a Benchmark: "executing_a_benchmark.md"
   - "roadmap.md"
 
 

From f52c6ea3dca3b6985f16d87d99a3367bf4eb2c3f Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Tue, 27 Aug 2024 15:24:07 +0100
Subject: [PATCH 74/81] Add docs for executing a benchmark

---
 docs/executing_a_benchmark.md | 107 ++++++++++++++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 docs/executing_a_benchmark.md

diff --git a/docs/executing_a_benchmark.md b/docs/executing_a_benchmark.md
new file mode 100644
index 000000000..d968241a1
--- /dev/null
+++ b/docs/executing_a_benchmark.md
@@ -0,0 +1,107 @@
+# Executing a Benchmark
+
+PhEval is designed for benchmarking algorithms across various datasets. To execute a benchmark using PhEval, you need to: 
+
+1. Execute your runner; generating the PhEval standardised TSV outputs for gene/variant/disease prioritisation.
+2. Configure the benchmarking parameters.
+3. Run the benchmark.
+
+PhEval will generate various performance reports, allowing you to easily compare the effectiveness of different algorithms.
+
+## After the Runner Execution
+
+After executing a run, you may be left with an output directory structure like so:
+
+```tree
+.
+├── pheval_disease_results
+│   ├── patient_1-pheval_disease_result.tsv
+├── pheval_gene_results
+│   ├── patient_1-pheval_gene_result.tsv
+├── pheval_variant_results
+│   ├── patient_1-pheval_variant_result.tsv
+├── raw_results
+│   ├── patient_1.json
+├── results.yml
+└── tool_input_commands
+    └── tool_input_commands.txt
+```
+Whether you have populated `pheval_disease_results`, `pheval_gene_results`, and `pheval_variant_results` directories will depend on what is specified in the `config.yaml` for the runner execution. It is the results in these directories that are consumed in the benchmarking to produce the statistical comparison reports.
+
+## Benchmarking Configuration File
+
+To configure the benchmarking parameters, a YAML configuration file should be created and supplied to the CLI command.
+
+An outline of the configuration file structure follows below:
+
+```yaml
+benchmark_name: exomiser_14_benchmark
+runs:
+  - run_identifier: run_identifier_1
+    results_dir: /path/to/results_dir_1
+    phenopacket_dir: /path/to/phenopacket_dir
+    gene_analysis: True
+    variant_analysis: False
+    disease_analysis: True
+    threshold:
+    score_order: descending
+  - run_identifier: run_identifier_2
+    results_dir: /path/to/results_dir_2
+    phenopacket_dir: /path/to/phenopacket_dir
+    gene_analysis: True
+    variant_analysis: True
+    disease_analysis: True
+    threshold:
+    score_order: descending
+plot_customisation:
+  gene_plots:
+    plot_type: bar_cumulative
+    rank_plot_title: 
+    roc_curve_title: 
+    precision_recall_title: 
+  disease_plots:
+    plot_type: bar_cumulative
+    rank_plot_title:
+    roc_curve_title: 
+    precision_recall_title: 
+  variant_plots:
+    plot_type: bar_cumulative
+    rank_plot_title: 
+    roc_curve_title: 
+    precision_recall_title: 
+
+```
+
+The `benchmark_name` is what will be used to name the duckdb database that will contain all the ranking and binary statistics as well as comparisons between runs. The name provided should not have any whitespace or special characters.
+
+### Runs section
+
+The `runs` section specifies which run configurations should be included in the benchmarking. For each run configuration you will need to populate the following parameters:
+
+- `run_identifier`: The identifier associated with the run - this should be meaningful as it will be used in the naming in tables and plots. 
+- `results_dir`: The full path to the root directory where the directories `pheval_gene_results`/`pheval_variant_results`/`pheval_disease_results` can be found.
+- `phenopacket_dir`: The full path to the phenopacket directory used during the runner execution.
+- `gene_analysis`: Boolean specifying whether to perform benchmarking for gene prioritisation analysis.
+- `variant_analysis`: Boolean specifying whether to perform benchmarking for variant prioritisation analysis
+- `disease_analysis`: Boolean specifying whether to perform benchmarking for disease prioritisation analysis
+- `threshold`: OPTIONAL score threshold to consider for inclusion of results. 
+- `score_order`: Ordering of results for ranking. Either ascending or descending.
+
+### Plot customisation section
+
+The `plot_customisation` section specifies any additional customisation to the plots output from the benchmarking. Here you can specify title names for all the plots output, as well as the plot type for displaying the summary ranking stats. This section is split by the plots output from the gene, variant and disease prioritisation benchmarking. The parameters in this section do not need to be populated - however, if left blank it will default to generic titles. The parameters as follows are:
+
+- `plot_type`: The plot type output for the summary rank stats plot. This can be either, bar_cumulative, bar_non_cumulative or bar_stacked.
+- `rank_plot_title`: The customised title for the summary rank stats plot.
+- `roc_curve_title`: The customised title for the ROC curve plot.
+- `precision_recall_title` The customised title for the precision-recall curve plot.
+
+## Executing the benchmark
+
+After configuring the benchmarking YAML, executing the benchmark is relatively simple.
+
+```bash
+pheval-utils generate-benchmark-stats --run-yaml benchmarking_config.yaml
+```
+
+

From 43b21b2bc82f2a912c9f658e3510035e59e23350 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Wed, 28 Aug 2024 15:55:44 +0100
Subject: [PATCH 75/81] remove @classmethod decorator

---
 src/pheval/analyse/run_data_parser.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/pheval/analyse/run_data_parser.py b/src/pheval/analyse/run_data_parser.py
index 6c81e6c93..ba0ce5d69 100644
--- a/src/pheval/analyse/run_data_parser.py
+++ b/src/pheval/analyse/run_data_parser.py
@@ -29,9 +29,8 @@ class RunConfig(BaseModel):
     threshold: Optional[float]
     score_order: Optional[str]
 
-    @classmethod
     @root_validator(pre=True)
-    def handle_blank_fields(cls, values: dict) -> dict:
+    def handle_blank_fields(cls, values: dict) -> dict:  # noqa: N805
         """
         Root validator to handle fields that may be explicitly set to None.
 
@@ -45,6 +44,7 @@ def handle_blank_fields(cls, values: dict) -> dict:
         """
         if values.get("threshold") is None:
             values["threshold"] = 0
+            print("setting default threshold")
         if values.get("score_order") is None:
             values["score_order"] = "descending"
         return values
@@ -66,9 +66,8 @@ class SinglePlotCustomisation(BaseModel):
     roc_curve_title: Optional[str]
     precision_recall_title: Optional[str]
 
-    @classmethod
     @root_validator(pre=True)
-    def handle_blank_fields(cls, values: dict) -> dict:
+    def handle_blank_fields(cls, values: dict) -> dict:  # noqa: N805
         """
         Root validator to handle fields that may be explicitly set to None.
 

From 9f09cf812851f459ef77fd6b24726a679c509988 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 5 Sep 2024 12:07:03 +0100
Subject: [PATCH 76/81] remove constants.py

---
 src/pheval/constants.py | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 src/pheval/constants.py

diff --git a/src/pheval/constants.py b/src/pheval/constants.py
deleted file mode 100644
index 7435dd119..000000000
--- a/src/pheval/constants.py
+++ /dev/null
@@ -1,8 +0,0 @@
-PHEVAL_RESULTS_DIRECTORY_SUFFIX = "_results"
-GENE_PRIORITISATION_TYPE_STR = "gene"
-GENE_PLOT_Y_LABEL = "Disease-causing genes (%)"
-VARIANT_PRIORITISATION_TYPE_STR = "variant"
-VARIANT_PLOT_Y_LABEL = "Disease-causing variants (%)"
-DISEASE_PRIORITISATION_TYPE_STR = "disease"
-DISEASE_PLOT_Y_LABEL = "Known diseases (%)"
-RANK_COMPARISON_SUFFIX = "_rank_comparison"

From dd79006b925e23ef6585020f1982487f46b1bae1 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 5 Sep 2024 12:07:17 +0100
Subject: [PATCH 77/81] keeping strings local to their usage

---
 src/pheval/analyse/benchmark_generator.py     | 21 ++++++-------------
 src/pheval/analyse/generate_plots.py          |  3 +--
 .../analyse/generate_summary_outputs.py       |  3 +--
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index 1b8b897ae..5a0201936 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -6,15 +6,6 @@
 from pheval.analyse.gene_prioritisation_analysis import benchmark_gene_prioritisation
 from pheval.analyse.run_data_parser import RunConfig, SinglePlotCustomisation
 from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
-from pheval.constants import (
-    DISEASE_PLOT_Y_LABEL,
-    DISEASE_PRIORITISATION_TYPE_STR,
-    GENE_PLOT_Y_LABEL,
-    GENE_PRIORITISATION_TYPE_STR,
-    VARIANT_PLOT_Y_LABEL,
-    VARIANT_PRIORITISATION_TYPE_STR,
-)
-
 
 @dataclass
 class BenchmarkRunOutputGenerator:
@@ -61,8 +52,8 @@ class GeneBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     """
 
     plot_customisation: SinglePlotCustomisation = None
-    prioritisation_type_string: str = GENE_PRIORITISATION_TYPE_STR
-    y_label: str = GENE_PLOT_Y_LABEL
+    prioritisation_type_string: str = "gene"
+    y_label: str = "Disease-causing genes (%)"
     generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
         benchmark_gene_prioritisation
     )
@@ -94,8 +85,8 @@ class VariantBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     """
 
     plot_customisation: SinglePlotCustomisation = None
-    prioritisation_type_string: str = VARIANT_PRIORITISATION_TYPE_STR
-    y_label: str = VARIANT_PLOT_Y_LABEL
+    prioritisation_type_string: str = "variant"
+    y_label: str = "Disease-causing variants (%)"
     generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
         benchmark_variant_prioritisation
     )
@@ -126,8 +117,8 @@ class DiseaseBenchmarkRunOutputGenerator(BenchmarkRunOutputGenerator):
     """
 
     plot_customisation: SinglePlotCustomisation = None
-    prioritisation_type_string: str = DISEASE_PRIORITISATION_TYPE_STR
-    y_label: str = DISEASE_PLOT_Y_LABEL
+    prioritisation_type_string: str = "disease"
+    y_label: str = "Known diseases (%)"
     generate_benchmark_run_results: Callable[[str, RunConfig, str, float], BenchmarkRunResults] = (
         benchmark_disease_prioritisation
     )
diff --git a/src/pheval/analyse/generate_plots.py b/src/pheval/analyse/generate_plots.py
index 4d680b450..89cc1a436 100644
--- a/src/pheval/analyse/generate_plots.py
+++ b/src/pheval/analyse/generate_plots.py
@@ -16,7 +16,6 @@
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.parse_benchmark_summary import parse_benchmark_db
 from pheval.analyse.run_data_parser import parse_run_config
-from pheval.constants import PHEVAL_RESULTS_DIRECTORY_SUFFIX
 
 
 def trim_corpus_results_directory_suffix(corpus_results_directory: Path) -> Path:
@@ -29,7 +28,7 @@ def trim_corpus_results_directory_suffix(corpus_results_directory: Path) -> Path
     Returns:
         Path: The Path object with the suffix removed from the directory name.
     """
-    return Path(str(corpus_results_directory).replace(PHEVAL_RESULTS_DIRECTORY_SUFFIX, ""))
+    return Path(str(corpus_results_directory).replace("_results", ""))
 
 
 class PlotGenerator:
diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index 7efe616f2..ccdd57272 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -5,7 +5,6 @@
 from pheval.analyse.benchmark_generator import BenchmarkRunOutputGenerator
 from pheval.analyse.benchmarking_data import BenchmarkRunResults
 from pheval.analyse.generate_plots import generate_plots
-from pheval.constants import RANK_COMPARISON_SUFFIX
 
 
 def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_prefix: str) -> str:
@@ -19,7 +18,7 @@ def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_pref
         The new table name.
     """
     return (
-        f"{run_identifier_1}_vs_" f"{run_identifier_2}_" f"{output_prefix}{RANK_COMPARISON_SUFFIX}"
+        f"{run_identifier_1}_vs_" f"{run_identifier_2}_" f"{output_prefix}_rank_comparison"
     )
 
 

From 2835124e1c5d8226e56d29a4798596e9beb9eed7 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 5 Sep 2024 12:08:21 +0100
Subject: [PATCH 78/81] tox lint

---
 src/pheval/analyse/benchmark_generator.py      | 1 +
 src/pheval/analyse/generate_summary_outputs.py | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/pheval/analyse/benchmark_generator.py b/src/pheval/analyse/benchmark_generator.py
index 5a0201936..ecaeddead 100644
--- a/src/pheval/analyse/benchmark_generator.py
+++ b/src/pheval/analyse/benchmark_generator.py
@@ -7,6 +7,7 @@
 from pheval.analyse.run_data_parser import RunConfig, SinglePlotCustomisation
 from pheval.analyse.variant_prioritisation_analysis import benchmark_variant_prioritisation
 
+
 @dataclass
 class BenchmarkRunOutputGenerator:
     """Base class for recording data required for generating benchmarking outputs.
diff --git a/src/pheval/analyse/generate_summary_outputs.py b/src/pheval/analyse/generate_summary_outputs.py
index ccdd57272..55d5cd693 100644
--- a/src/pheval/analyse/generate_summary_outputs.py
+++ b/src/pheval/analyse/generate_summary_outputs.py
@@ -17,9 +17,7 @@ def get_new_table_name(run_identifier_1: str, run_identifier_2: str, output_pref
     Returns:
         The new table name.
     """
-    return (
-        f"{run_identifier_1}_vs_" f"{run_identifier_2}_" f"{output_prefix}_rank_comparison"
-    )
+    return f"{run_identifier_1}_vs_" f"{run_identifier_2}_" f"{output_prefix}_rank_comparison"
 
 
 def create_comparison_table(

From 788f363a0e02a1a75ddaa8fb76f386aa33294fda Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 5 Sep 2024 16:18:30 +0100
Subject: [PATCH 79/81] refactor benchmark command

---
 resources/Makefile.j2 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/Makefile.j2 b/resources/Makefile.j2
index 0e9441e45..6da142439 100644
--- a/resources/Makefile.j2
+++ b/resources/Makefile.j2
@@ -96,7 +96,7 @@ $(ROOT_DIR)/results/run_data.txt:
 	touch $@
 
 $(ROOT_DIR)/results/gene_rank_stats.svg: $(ROOT_DIR)/results/run_data.txt
-	pheval-utils benchmark-comparison -r $< -o $(ROOT_DIR)/$(shell dirname $@)/results --gene-analysis -y bar_cumulative
+	pheval-utils generate-benchmark-stats -r $< -o $(ROOT_DIR)/$(shell dirname $@)/results --gene-analysis -y bar_cumulative
 	mv $(ROOT_DIR)/gene_rank_stats.svg $@
 
 .PHONY: pheval-report

From 5a1e67736848a57cbf6dbdb9f5306e232b3a94e2 Mon Sep 17 00:00:00 2001
From: Yasemin Bridges <y.bridges@qmul.ac.uk>
Date: Thu, 5 Sep 2024 16:22:43 +0100
Subject: [PATCH 80/81] Revert "refactor benchmark command"

This reverts commit 788f363a0e02a1a75ddaa8fb76f386aa33294fda.
---
 resources/Makefile.j2 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/Makefile.j2 b/resources/Makefile.j2
index 6da142439..0e9441e45 100644
--- a/resources/Makefile.j2
+++ b/resources/Makefile.j2
@@ -96,7 +96,7 @@ $(ROOT_DIR)/results/run_data.txt:
 	touch $@
 
 $(ROOT_DIR)/results/gene_rank_stats.svg: $(ROOT_DIR)/results/run_data.txt
-	pheval-utils generate-benchmark-stats -r $< -o $(ROOT_DIR)/$(shell dirname $@)/results --gene-analysis -y bar_cumulative
+	pheval-utils benchmark-comparison -r $< -o $(ROOT_DIR)/$(shell dirname $@)/results --gene-analysis -y bar_cumulative
 	mv $(ROOT_DIR)/gene_rank_stats.svg $@
 
 .PHONY: pheval-report

From dfe23e323568964856385481808fc5e7b3bc34ac Mon Sep 17 00:00:00 2001
From: souzadevinicius <souzadevinicius@gmail.com>
Date: Fri, 6 Sep 2024 13:31:35 +0100
Subject: [PATCH 81/81] Refactoring the end-to-end test to align with the
 refactored pheval methods.

---
 Makefile                          | 49 ++++++++++++++++++++++++-------
 resources/Makefile.j2             | 49 ++++++++++++++++++++++++-------
 tests/test_pipeline_workflow.yaml | 11 ++++---
 3 files changed, 83 insertions(+), 26 deletions(-)

diff --git a/Makefile b/Makefile
index 599944d6f..af5ea4875 100644
--- a/Makefile
+++ b/Makefile
@@ -57,15 +57,6 @@ $(TMP_DATA)/semsim/%.sql:
 	wget $(SEMSIM_BASE_URL)/$*.sql -O $@
 
 
-$(ROOT_DIR)/results/run_data.txt:
-	touch $@
-
-$(ROOT_DIR)/results/gene_rank_stats.svg: $(ROOT_DIR)/results/run_data.txt
-	pheval-utils benchmark-comparison -r $< -o $(ROOT_DIR)/$(shell dirname $@)/results --gene-analysis -y bar_cumulative
-	mv $(ROOT_DIR)/gene_rank_stats.svg $@
-
-.PHONY: pheval-report
-pheval-report: $(ROOT_DIR)/results/gene_rank_stats.svg
 
 
 $(ROOT_DIR)/results/template-1.0.0/results.yml: configurations/template-1.0.0/config.yaml corpora/lirical/default/corpus.yml
@@ -88,10 +79,48 @@ $(ROOT_DIR)/results/template-1.0.0/results.yml: configurations/template-1.0.0/co
 	 --output-dir $(shell dirname $@)
 
 	touch $@
-	echo -e "$(ROOT_DIR)/corpora/lirical/default/phenopackets\t$(shell dirname $@)" >> results/run_data.txt
 
 .PHONY: pheval-run
 pheval-run: $(ROOT_DIR)/results/template-1.0.0/results.yml
+
+
+$(ROOT_DIR)/results/template-1.0.0/run_data.yaml:
+	printf '%s\n' \
+	"benchmark_name: fake_predictor_benchmark" \
+	"runs:" \
+	"  - run_identifier: run_identifier_1" \
+	"    results_dir: $(shell dirname $@)" \
+	"    phenopacket_dir: $(ROOT_DIR)/corpora/lirical/default/phenopackets" \
+	"    gene_analysis: True" \
+	"    variant_analysis: False" \
+	"    disease_analysis: False" \
+	"    threshold:" \
+	"    score_order: descending" \
+	"plot_customisation:" \
+	"  gene_plots:" \
+	"    plot_type: bar_cumulative" \
+	"    rank_plot_title:" \
+	"    roc_curve_title: " \
+	"    precision_recall_title: " \
+	"  disease_plots:" \
+	"    plot_type: bar_cumulative" \
+	"    rank_plot_title:" \
+	"    roc_curve_title: " \
+	"    precision_recall_title: " \
+	"  variant_plots:" \
+	"    plot_type: bar_cumulative" \
+	"    rank_plot_title: " \
+	"    roc_curve_title: " \
+	"    precision_recall_title: " \
+	> $@
+
+$(ROOT_DIR)/results/template-1.0.0/gene_rank_stats.svg: $(ROOT_DIR)/results/template-1.0.0/run_data.yaml
+	pheval-utils generate-benchmark-stats -r $<
+
+.PHONY: pheval-report
+pheval-report: $(ROOT_DIR)/results/template-1.0.0/gene_rank_stats.svg
+
+
 corpora/lirical/default/corpus.yml:
 	test -d $(ROOT_DIR)/corpora/lirical/default/ || mkdir -p $(ROOT_DIR)/corpora/lirical/default/
 
diff --git a/resources/Makefile.j2 b/resources/Makefile.j2
index 0e9441e45..6f894dc7a 100644
--- a/resources/Makefile.j2
+++ b/resources/Makefile.j2
@@ -92,15 +92,6 @@ $(TMP_DATA)/semsim/%.sql:
 	wget $(SEMSIM_BASE_URL)/$*.sql -O $@
 
 
-$(ROOT_DIR)/results/run_data.txt:
-	touch $@
-
-$(ROOT_DIR)/results/gene_rank_stats.svg: $(ROOT_DIR)/results/run_data.txt
-	pheval-utils benchmark-comparison -r $< -o $(ROOT_DIR)/$(shell dirname $@)/results --gene-analysis -y bar_cumulative
-	mv $(ROOT_DIR)/gene_rank_stats.svg $@
-
-.PHONY: pheval-report
-pheval-report: $(ROOT_DIR)/results/gene_rank_stats.svg
 
 {% for run in runs %}
 $(ROOT_DIR)/results/{{ run.configuration }}/results.yml: configurations/{{ run.configuration }}/config.yaml corpora/{{ run.corpus }}/{{ run.corpusvariant }}/corpus.yml
@@ -125,10 +116,48 @@ $(ROOT_DIR)/results/{{ run.configuration }}/results.yml: configurations/{{ run.c
 	 --output-dir $(shell dirname $@)
 
 	touch $@
-	echo -e "$(ROOT_DIR)/corpora/{{ run.corpus }}/default/phenopackets\t$(shell dirname $@)" >> results/run_data.txt
 
 .PHONY: pheval-run
 pheval-run: $(ROOT_DIR)/results/{{ run.configuration }}/results.yml
+
+
+$(ROOT_DIR)/results/{{ run.configuration }}/run_data.yaml:
+	printf '%s\n' \
+	"benchmark_name: fake_predictor_benchmark" \
+	"runs:" \
+	"  - run_identifier: run_identifier_1" \
+	"    results_dir: $(shell dirname $@)" \
+	"    phenopacket_dir: $(ROOT_DIR)/corpora/lirical/default/phenopackets" \
+	"    gene_analysis: True" \
+	"    variant_analysis: False" \
+	"    disease_analysis: False" \
+	"    threshold:" \
+	"    score_order: descending" \
+	"plot_customisation:" \
+	"  gene_plots:" \
+	"    plot_type: bar_cumulative" \
+	"    rank_plot_title:" \
+	"    roc_curve_title: " \
+	"    precision_recall_title: " \
+	"  disease_plots:" \
+	"    plot_type: bar_cumulative" \
+	"    rank_plot_title:" \
+	"    roc_curve_title: " \
+	"    precision_recall_title: " \
+	"  variant_plots:" \
+	"    plot_type: bar_cumulative" \
+	"    rank_plot_title: " \
+	"    roc_curve_title: " \
+	"    precision_recall_title: " \
+	> $@
+
+$(ROOT_DIR)/results/{{ run.configuration }}/gene_rank_stats.svg: $(ROOT_DIR)/results/{{ run.configuration }}/run_data.yaml
+	pheval-utils generate-benchmark-stats -r $<
+
+.PHONY: pheval-report
+pheval-report: $(ROOT_DIR)/results/{{ run.configuration }}/gene_rank_stats.svg
+
+
 {% endfor %}
 
 
diff --git a/tests/test_pipeline_workflow.yaml b/tests/test_pipeline_workflow.yaml
index 0aadf8780..6ed1c5dab 100644
--- a/tests/test_pipeline_workflow.yaml
+++ b/tests/test_pipeline_workflow.yaml
@@ -1,13 +1,12 @@
 - name: Run PhEval pipeline
   command: make pheval
   files:
-  - path: results/run_data.txt
+  - path: results/template-1.0.0/run_data.yaml
     contains:
-      - "corpora/lirical/default/phenopackets"
-  - path: gene_roc_curve.svg
-  - path: gene_precision_recall_curve.svg
-  - path: results/gene_rank_stats.svg
-
+      - "fake_predictor_benchmark"
+  - path: fake_predictor_benchmark_gene_rank_stats.svg
+  - path: fake_predictor_benchmark_gene_roc_curve.svg
+  - path: fake_predictor_benchmark_gene_pr_curve.svg
   stdout:
     contains:
       - "running with fake predictor"
\ No newline at end of file